diff options
| author | 2016-09-21 11:29:48 -0700 | |
|---|---|---|
| committer | 2016-09-21 11:29:48 -0700 | |
| commit | d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a (patch) | |
| tree | 8a22ca73ff838f3f0090b29a548ae81087fc90ed /src/video_core | |
| parent | README: Specify master branch for Travis CI badge (diff) | |
| parent | Fix Travis clang-format check (diff) | |
| download | yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.gz yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.xz yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.zip | |
Merge pull request #2086 from linkmauve/clang-format
Add clang-format as part of our {commit,travis}-time checks
Diffstat (limited to 'src/video_core')
42 files changed, 2927 insertions, 2610 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index db99ce666..05b5cea73 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -5,15 +5,12 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | |||
| 9 | #include <boost/container/static_vector.hpp> | 8 | #include <boost/container/static_vector.hpp> |
| 10 | #include <boost/container/vector.hpp> | 9 | #include <boost/container/vector.hpp> |
| 11 | |||
| 12 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 15 | #include "common/vector_math.h" | 13 | #include "common/vector_math.h" |
| 16 | |||
| 17 | #include "video_core/clipper.h" | 14 | #include "video_core/clipper.h" |
| 18 | #include "video_core/pica.h" | 15 | #include "video_core/pica.h" |
| 19 | #include "video_core/pica_state.h" | 16 | #include "video_core/pica_state.h" |
| @@ -27,15 +24,10 @@ namespace Clipper { | |||
| 27 | 24 | ||
| 28 | struct ClippingEdge { | 25 | struct ClippingEdge { |
| 29 | public: | 26 | public: |
| 30 | ClippingEdge(Math::Vec4<float24> coeffs, | 27 | ClippingEdge(Math::Vec4<float24> coeffs, Math::Vec4<float24> bias = Math::Vec4<float24>( |
| 31 | Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), | 28 | float24::FromFloat32(0), float24::FromFloat32(0), |
| 32 | float24::FromFloat32(0), | 29 | float24::FromFloat32(0), float24::FromFloat32(0))) |
| 33 | float24::FromFloat32(0), | 30 | : coeffs(coeffs), bias(bias) {} |
| 34 | float24::FromFloat32(0))) | ||
| 35 | : coeffs(coeffs), | ||
| 36 | bias(bias) | ||
| 37 | { | ||
| 38 | } | ||
| 39 | 31 | ||
| 40 | bool IsInside(const OutputVertex& vertex) const { | 32 | bool IsInside(const OutputVertex& vertex) const { |
| 41 | return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); | 33 | return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); |
| @@ -59,8 +51,7 @@ private: | |||
| 59 | Math::Vec4<float24> bias; | 51 | Math::Vec4<float24> bias; |
| 60 | }; | 52 | }; |
| 61 | 53 | ||
| 62 | static void InitScreenCoordinates(OutputVertex& vtx) | 54 | static void InitScreenCoordinates(OutputVertex& vtx) { |
| 63 | { | ||
| 64 | struct { | 55 | struct { |
| 65 | float24 halfsize_x; | 56 | float24 halfsize_x; |
| 66 | float24 offset_x; | 57 | float24 offset_x; |
| @@ -73,8 +64,8 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 73 | const auto& regs = g_state.regs; | 64 | const auto& regs = g_state.regs; |
| 74 | viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); | 65 | viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); |
| 75 | viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); | 66 | viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); |
| 76 | viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); | 67 | viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); |
| 77 | viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); | 68 | viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); |
| 78 | 69 | ||
| 79 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; | 70 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; |
| 80 | vtx.color *= inv_w; | 71 | vtx.color *= inv_w; |
| @@ -85,12 +76,14 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 85 | vtx.tc2 *= inv_w; | 76 | vtx.tc2 *= inv_w; |
| 86 | vtx.pos.w = inv_w; | 77 | vtx.pos.w = inv_w; |
| 87 | 78 | ||
| 88 | vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; | 79 | vtx.screenpos[0] = |
| 89 | vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | 80 | (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; |
| 81 | vtx.screenpos[1] = | ||
| 82 | (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | ||
| 90 | vtx.screenpos[2] = vtx.pos.z * inv_w; | 83 | vtx.screenpos[2] = vtx.pos.z * inv_w; |
| 91 | } | 84 | } |
| 92 | 85 | ||
| 93 | void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { | 86 | void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { |
| 94 | using boost::container::static_vector; | 87 | using boost::container::static_vector; |
| 95 | 88 | ||
| 96 | // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at | 89 | // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at |
| @@ -98,10 +91,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu | |||
| 98 | // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a | 91 | // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a |
| 99 | // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. | 92 | // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. |
| 100 | static const size_t MAX_VERTICES = 9; | 93 | static const size_t MAX_VERTICES = 9; |
| 101 | static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 }; | 94 | static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; |
| 102 | static_vector<OutputVertex, MAX_VERTICES> buffer_b; | 95 | static_vector<OutputVertex, MAX_VERTICES> buffer_b; |
| 103 | auto* output_list = &buffer_a; | 96 | auto* output_list = &buffer_a; |
| 104 | auto* input_list = &buffer_b; | 97 | auto* input_list = &buffer_b; |
| 105 | 98 | ||
| 106 | // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. | 99 | // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. |
| 107 | // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest | 100 | // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest |
| @@ -110,13 +103,13 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu | |||
| 110 | static const float24 f0 = float24::FromFloat32(0.0); | 103 | static const float24 f0 = float24::FromFloat32(0.0); |
| 111 | static const float24 f1 = float24::FromFloat32(1.0); | 104 | static const float24 f1 = float24::FromFloat32(1.0); |
| 112 | static const std::array<ClippingEdge, 7> clipping_edges = {{ | 105 | static const std::array<ClippingEdge, 7> clipping_edges = {{ |
| 113 | { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w | 106 | {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w |
| 114 | { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w | 107 | {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w |
| 115 | { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w | 108 | {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w |
| 116 | { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w | 109 | {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w |
| 117 | { Math::MakeVec( f0, f0, f1, f0) }, // z = 0 | 110 | {Math::MakeVec(f0, f0, f1, f0)}, // z = 0 |
| 118 | { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w | 111 | {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w |
| 119 | { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON | 112 | {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON |
| 120 | }}; | 113 | }}; |
| 121 | 114 | ||
| 122 | // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) | 115 | // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) |
| @@ -154,10 +147,10 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu | |||
| 154 | InitScreenCoordinates((*output_list)[0]); | 147 | InitScreenCoordinates((*output_list)[0]); |
| 155 | InitScreenCoordinates((*output_list)[1]); | 148 | InitScreenCoordinates((*output_list)[1]); |
| 156 | 149 | ||
| 157 | for (size_t i = 0; i < output_list->size() - 2; i ++) { | 150 | for (size_t i = 0; i < output_list->size() - 2; i++) { |
| 158 | OutputVertex& vtx0 = (*output_list)[0]; | 151 | OutputVertex& vtx0 = (*output_list)[0]; |
| 159 | OutputVertex& vtx1 = (*output_list)[i+1]; | 152 | OutputVertex& vtx1 = (*output_list)[i + 1]; |
| 160 | OutputVertex& vtx2 = (*output_list)[i+2]; | 153 | OutputVertex& vtx2 = (*output_list)[i + 2]; |
| 161 | 154 | ||
| 162 | InitScreenCoordinates(vtx2); | 155 | InitScreenCoordinates(vtx2); |
| 163 | 156 | ||
| @@ -165,19 +158,20 @@ void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const Outpu | |||
| 165 | "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " | 158 | "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " |
| 166 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " | 159 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " |
| 167 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", | 160 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", |
| 168 | i + 1, output_list->size() - 2, | 161 | i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), |
| 169 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), | 162 | vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(), |
| 170 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), | 163 | vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), |
| 171 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), | 164 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), |
| 172 | vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), | 165 | vtx2.pos.w.ToFloat32(), vtx0.screenpos.x.ToFloat32(), |
| 173 | vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), | 166 | vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), |
| 174 | vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); | 167 | vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), |
| 168 | vtx1.screenpos.z.ToFloat32(), vtx2.screenpos.x.ToFloat32(), | ||
| 169 | vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); | ||
| 175 | 170 | ||
| 176 | Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); | 171 | Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); |
| 177 | } | 172 | } |
| 178 | } | 173 | } |
| 179 | 174 | ||
| 180 | |||
| 181 | } // namespace | 175 | } // namespace |
| 182 | 176 | ||
| 183 | } // namespace | 177 | } // namespace |
diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h index f85d8d4c9..b51af0af9 100644 --- a/src/video_core/clipper.h +++ b/src/video_core/clipper.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | namespace Pica { | 7 | namespace Pica { |
| 8 | 8 | ||
| 9 | namespace Shader { | 9 | namespace Shader { |
| 10 | struct OutputVertex; | 10 | struct OutputVertex; |
| 11 | } | 11 | } |
| 12 | 12 | ||
| 13 | namespace Clipper { | 13 | namespace Clipper { |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 689859049..bb618cacd 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -6,17 +6,14 @@ | |||
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | |||
| 10 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 11 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 12 | #include "common/microprofile.h" | 11 | #include "common/microprofile.h" |
| 13 | #include "common/vector_math.h" | 12 | #include "common/vector_math.h" |
| 14 | |||
| 15 | #include "core/hle/service/gsp_gpu.h" | 13 | #include "core/hle/service/gsp_gpu.h" |
| 16 | #include "core/hw/gpu.h" | 14 | #include "core/hw/gpu.h" |
| 17 | #include "core/memory.h" | 15 | #include "core/memory.h" |
| 18 | #include "core/tracer/recorder.h" | 16 | #include "core/tracer/recorder.h" |
| 19 | |||
| 20 | #include "video_core/command_processor.h" | 17 | #include "video_core/command_processor.h" |
| 21 | #include "video_core/debug_utils/debug_utils.h" | 18 | #include "video_core/debug_utils/debug_utils.h" |
| 22 | #include "video_core/pica.h" | 19 | #include "video_core/pica.h" |
| @@ -43,10 +40,8 @@ static u32 default_attr_write_buffer[3]; | |||
| 43 | 40 | ||
| 44 | // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF | 41 | // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF |
| 45 | static const u32 expand_bits_to_bytes[] = { | 42 | static const u32 expand_bits_to_bytes[] = { |
| 46 | 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, | 43 | 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, |
| 47 | 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, | 44 | 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, |
| 48 | 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, | ||
| 49 | 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff | ||
| 50 | }; | 45 | }; |
| 51 | 46 | ||
| 52 | MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); | 47 | MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); |
| @@ -68,383 +63,393 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 68 | 63 | ||
| 69 | regs[id] = (old_value & ~write_mask) | (value & write_mask); | 64 | regs[id] = (old_value & ~write_mask) | (value & write_mask); |
| 70 | 65 | ||
| 71 | DebugUtils::OnPicaRegWrite({ (u16)id, (u16)mask, regs[id] }); | 66 | DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs[id]}); |
| 72 | 67 | ||
| 73 | if (g_debug_context) | 68 | if (g_debug_context) |
| 74 | g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); | 69 | g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, |
| 75 | 70 | reinterpret_cast<void*>(&id)); | |
| 76 | switch(id) { | 71 | |
| 77 | // Trigger IRQ | 72 | switch (id) { |
| 78 | case PICA_REG_INDEX(trigger_irq): | 73 | // Trigger IRQ |
| 79 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); | 74 | case PICA_REG_INDEX(trigger_irq): |
| 80 | break; | 75 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); |
| 81 | 76 | break; | |
| 82 | case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E): | 77 | |
| 83 | g_state.primitive_assembler.Reconfigure(regs.triangle_topology); | 78 | case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E): |
| 84 | break; | 79 | g_state.primitive_assembler.Reconfigure(regs.triangle_topology); |
| 85 | 80 | break; | |
| 86 | case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F): | 81 | |
| 87 | g_state.primitive_assembler.Reset(); | 82 | case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F): |
| 88 | break; | 83 | g_state.primitive_assembler.Reset(); |
| 89 | 84 | break; | |
| 90 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): | 85 | |
| 91 | g_state.immediate.current_attribute = 0; | 86 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): |
| 87 | g_state.immediate.current_attribute = 0; | ||
| 88 | default_attr_counter = 0; | ||
| 89 | break; | ||
| 90 | |||
| 91 | // Load default vertex input attributes | ||
| 92 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): | ||
| 93 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): | ||
| 94 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): { | ||
| 95 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 96 | // it directly write the values? | ||
| 97 | default_attr_write_buffer[default_attr_counter++] = value; | ||
| 98 | |||
| 99 | // Default attributes are written in a packed format such that four float24 values are | ||
| 100 | // encoded in | ||
| 101 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 102 | // written. | ||
| 103 | if (default_attr_counter >= 3) { | ||
| 92 | default_attr_counter = 0; | 104 | default_attr_counter = 0; |
| 93 | break; | ||
| 94 | |||
| 95 | // Load default vertex input attributes | ||
| 96 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): | ||
| 97 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): | ||
| 98 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): | ||
| 99 | { | ||
| 100 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 101 | // it directly write the values? | ||
| 102 | default_attr_write_buffer[default_attr_counter++] = value; | ||
| 103 | |||
| 104 | // Default attributes are written in a packed format such that four float24 values are encoded in | ||
| 105 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 106 | // written. | ||
| 107 | if (default_attr_counter >= 3) { | ||
| 108 | default_attr_counter = 0; | ||
| 109 | |||
| 110 | auto& setup = regs.vs_default_attributes_setup; | ||
| 111 | |||
| 112 | if (setup.index >= 16) { | ||
| 113 | LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | 105 | ||
| 117 | Math::Vec4<float24> attribute; | 106 | auto& setup = regs.vs_default_attributes_setup; |
| 118 | 107 | ||
| 119 | // NOTE: The destination component order indeed is "backwards" | 108 | if (setup.index >= 16) { |
| 120 | attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); | 109 | LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); |
| 121 | attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); | 110 | break; |
| 122 | attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); | 111 | } |
| 123 | attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); | ||
| 124 | 112 | ||
| 125 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, | 113 | Math::Vec4<float24> attribute; |
| 126 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), | ||
| 127 | attribute.w.ToFloat32()); | ||
| 128 | 114 | ||
| 129 | // TODO: Verify that this actually modifies the register! | 115 | // NOTE: The destination component order indeed is "backwards" |
| 130 | if (setup.index < 15) { | 116 | attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); |
| 131 | g_state.vs_default_attributes[setup.index] = attribute; | 117 | attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | |
| 132 | setup.index++; | 118 | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); |
| 133 | } else { | 119 | attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | |
| 134 | // Put each attribute into an immediate input buffer. | 120 | ((default_attr_write_buffer[2] >> 24) & 0xFF)); |
| 135 | // When all specified immediate attributes are present, the Vertex Shader is invoked and everything is | 121 | attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); |
| 136 | // sent to the primitive assembler. | ||
| 137 | 122 | ||
| 138 | auto& immediate_input = g_state.immediate.input_vertex; | 123 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, |
| 139 | auto& immediate_attribute_id = g_state.immediate.current_attribute; | 124 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), |
| 125 | attribute.w.ToFloat32()); | ||
| 140 | 126 | ||
| 141 | immediate_input.attr[immediate_attribute_id++] = attribute; | 127 | // TODO: Verify that this actually modifies the register! |
| 128 | if (setup.index < 15) { | ||
| 129 | g_state.vs_default_attributes[setup.index] = attribute; | ||
| 130 | setup.index++; | ||
| 131 | } else { | ||
| 132 | // Put each attribute into an immediate input buffer. | ||
| 133 | // When all specified immediate attributes are present, the Vertex Shader is invoked | ||
| 134 | // and everything is | ||
| 135 | // sent to the primitive assembler. | ||
| 142 | 136 | ||
| 143 | if (immediate_attribute_id >= regs.vs.num_input_attributes+1) { | 137 | auto& immediate_input = g_state.immediate.input_vertex; |
| 144 | immediate_attribute_id = 0; | 138 | auto& immediate_attribute_id = g_state.immediate.current_attribute; |
| 145 | 139 | ||
| 146 | Shader::UnitState<false> shader_unit; | 140 | immediate_input.attr[immediate_attribute_id++] = attribute; |
| 147 | g_state.vs.Setup(); | ||
| 148 | 141 | ||
| 149 | // Send to vertex shader | 142 | if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { |
| 150 | if (g_debug_context) | 143 | immediate_attribute_id = 0; |
| 151 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input)); | ||
| 152 | g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); | ||
| 153 | Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs); | ||
| 154 | 144 | ||
| 155 | // Send to renderer | 145 | Shader::UnitState<false> shader_unit; |
| 156 | using Pica::Shader::OutputVertex; | 146 | g_state.vs.Setup(); |
| 157 | auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { | ||
| 158 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 159 | }; | ||
| 160 | 147 | ||
| 161 | g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | 148 | // Send to vertex shader |
| 162 | } | 149 | if (g_debug_context) |
| 150 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | ||
| 151 | static_cast<void*>(&immediate_input)); | ||
| 152 | g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1); | ||
| 153 | Shader::OutputVertex output_vertex = | ||
| 154 | shader_unit.output_registers.ToVertex(regs.vs); | ||
| 155 | |||
| 156 | // Send to renderer | ||
| 157 | using Pica::Shader::OutputVertex; | ||
| 158 | auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | ||
| 159 | const OutputVertex& v2) { | ||
| 160 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 161 | }; | ||
| 162 | |||
| 163 | g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | ||
| 163 | } | 164 | } |
| 164 | } | 165 | } |
| 165 | break; | ||
| 166 | } | 166 | } |
| 167 | break; | ||
| 168 | } | ||
| 167 | 169 | ||
| 168 | case PICA_REG_INDEX(gpu_mode): | 170 | case PICA_REG_INDEX(gpu_mode): |
| 169 | if (regs.gpu_mode == Regs::GPUMode::Configuring) { | 171 | if (regs.gpu_mode == Regs::GPUMode::Configuring) { |
| 170 | // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring | 172 | // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring |
| 171 | VideoCore::g_renderer->Rasterizer()->DrawTriangles(); | 173 | VideoCore::g_renderer->Rasterizer()->DrawTriangles(); |
| 172 | 174 | ||
| 173 | if (g_debug_context) { | 175 | if (g_debug_context) { |
| 174 | g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); | 176 | g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); |
| 175 | } | ||
| 176 | } | 177 | } |
| 177 | break; | ||
| 178 | |||
| 179 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): | ||
| 180 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): | ||
| 181 | { | ||
| 182 | unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0])); | ||
| 183 | u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); | ||
| 184 | g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; | ||
| 185 | g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); | ||
| 186 | break; | ||
| 187 | } | 178 | } |
| 179 | break; | ||
| 180 | |||
| 181 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): | ||
| 182 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): { | ||
| 183 | unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0])); | ||
| 184 | u32* head_ptr = | ||
| 185 | (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); | ||
| 186 | g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; | ||
| 187 | g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); | ||
| 188 | break; | ||
| 189 | } | ||
| 188 | 190 | ||
| 189 | // It seems like these trigger vertex rendering | 191 | // It seems like these trigger vertex rendering |
| 190 | case PICA_REG_INDEX(trigger_draw): | 192 | case PICA_REG_INDEX(trigger_draw): |
| 191 | case PICA_REG_INDEX(trigger_draw_indexed): | 193 | case PICA_REG_INDEX(trigger_draw_indexed): { |
| 192 | { | 194 | MICROPROFILE_SCOPE(GPU_Drawing); |
| 193 | MICROPROFILE_SCOPE(GPU_Drawing); | ||
| 194 | 195 | ||
| 195 | #if PICA_LOG_TEV | 196 | #if PICA_LOG_TEV |
| 196 | DebugUtils::DumpTevStageConfig(regs.GetTevStages()); | 197 | DebugUtils::DumpTevStageConfig(regs.GetTevStages()); |
| 197 | #endif | 198 | #endif |
| 198 | if (g_debug_context) | 199 | if (g_debug_context) |
| 199 | g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); | 200 | g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); |
| 200 | 201 | ||
| 201 | // Processes information about internal vertex attributes to figure out how a vertex is loaded. | 202 | // Processes information about internal vertex attributes to figure out how a vertex is |
| 202 | // Later, these can be compiled and cached. | 203 | // loaded. |
| 203 | const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); | 204 | // Later, these can be compiled and cached. |
| 204 | VertexLoader loader(regs); | 205 | const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); |
| 205 | 206 | VertexLoader loader(regs); | |
| 206 | // Load vertices | 207 | |
| 207 | bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); | 208 | // Load vertices |
| 208 | 209 | bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); | |
| 209 | const auto& index_info = regs.index_array; | 210 | |
| 210 | const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); | 211 | const auto& index_info = regs.index_array; |
| 211 | const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); | 212 | const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); |
| 212 | bool index_u16 = index_info.format != 0; | 213 | const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); |
| 213 | 214 | bool index_u16 = index_info.format != 0; | |
| 214 | PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; | 215 | |
| 215 | 216 | PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; | |
| 216 | if (g_debug_context) { | 217 | |
| 217 | for (int i = 0; i < 3; ++i) { | 218 | if (g_debug_context) { |
| 218 | const auto texture = regs.GetTextures()[i]; | 219 | for (int i = 0; i < 3; ++i) { |
| 219 | if (!texture.enabled) | 220 | const auto texture = regs.GetTextures()[i]; |
| 220 | continue; | 221 | if (!texture.enabled) |
| 221 | 222 | continue; | |
| 222 | u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); | 223 | |
| 223 | if (g_debug_context && Pica::g_debug_context->recorder) | 224 | u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); |
| 224 | g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress()); | 225 | if (g_debug_context && Pica::g_debug_context->recorder) |
| 225 | } | 226 | g_debug_context->recorder->MemoryAccessed( |
| 227 | texture_data, Pica::Regs::NibblesPerPixel(texture.format) * | ||
| 228 | texture.config.width / 2 * texture.config.height, | ||
| 229 | texture.config.GetPhysicalAddress()); | ||
| 226 | } | 230 | } |
| 231 | } | ||
| 227 | 232 | ||
| 228 | DebugUtils::MemoryAccessTracker memory_accesses; | 233 | DebugUtils::MemoryAccessTracker memory_accesses; |
| 229 | |||
| 230 | // Simple circular-replacement vertex cache | ||
| 231 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup | ||
| 232 | const size_t VERTEX_CACHE_SIZE = 32; | ||
| 233 | std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||
| 234 | std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache; | ||
| 235 | 234 | ||
| 236 | unsigned int vertex_cache_pos = 0; | 235 | // Simple circular-replacement vertex cache |
| 237 | vertex_cache_ids.fill(-1); | 236 | // The size has been tuned for optimal balance between hit-rate and the cost of lookup |
| 237 | const size_t VERTEX_CACHE_SIZE = 32; | ||
| 238 | std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||
| 239 | std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache; | ||
| 238 | 240 | ||
| 239 | Shader::UnitState<false> shader_unit; | 241 | unsigned int vertex_cache_pos = 0; |
| 240 | g_state.vs.Setup(); | 242 | vertex_cache_ids.fill(-1); |
| 241 | 243 | ||
| 242 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 244 | Shader::UnitState<false> shader_unit; |
| 243 | { | 245 | g_state.vs.Setup(); |
| 244 | // Indexed rendering doesn't use the start offset | ||
| 245 | unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : (index + regs.vertex_offset); | ||
| 246 | 246 | ||
| 247 | // -1 is a common special value used for primitive restart. Since it's unknown if | 247 | for (unsigned int index = 0; index < regs.num_vertices; ++index) { |
| 248 | // the PICA supports it, and it would mess up the caching, guard against it here. | 248 | // Indexed rendering doesn't use the start offset |
| 249 | ASSERT(vertex != -1); | 249 | unsigned int vertex = |
| 250 | is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) | ||
| 251 | : (index + regs.vertex_offset); | ||
| 250 | 252 | ||
| 251 | bool vertex_cache_hit = false; | 253 | // -1 is a common special value used for primitive restart. Since it's unknown if |
| 252 | Shader::OutputRegisters output_registers; | 254 | // the PICA supports it, and it would mess up the caching, guard against it here. |
| 255 | ASSERT(vertex != -1); | ||
| 253 | 256 | ||
| 254 | if (is_indexed) { | 257 | bool vertex_cache_hit = false; |
| 255 | if (g_debug_context && Pica::g_debug_context->recorder) { | 258 | Shader::OutputRegisters output_registers; |
| 256 | int size = index_u16 ? 2 : 1; | ||
| 257 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); | ||
| 258 | } | ||
| 259 | 259 | ||
| 260 | for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | 260 | if (is_indexed) { |
| 261 | if (vertex == vertex_cache_ids[i]) { | 261 | if (g_debug_context && Pica::g_debug_context->recorder) { |
| 262 | output_registers = vertex_cache[i]; | 262 | int size = index_u16 ? 2 : 1; |
| 263 | vertex_cache_hit = true; | 263 | memory_accesses.AddAccess(base_address + index_info.offset + size * index, |
| 264 | break; | 264 | size); |
| 265 | } | ||
| 266 | } | ||
| 267 | } | 265 | } |
| 268 | 266 | ||
| 269 | if (!vertex_cache_hit) { | 267 | for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { |
| 270 | // Initialize data for the current vertex | 268 | if (vertex == vertex_cache_ids[i]) { |
| 271 | Shader::InputVertex input; | 269 | output_registers = vertex_cache[i]; |
| 272 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | 270 | vertex_cache_hit = true; |
| 273 | 271 | break; | |
| 274 | // Send to vertex shader | ||
| 275 | if (g_debug_context) | ||
| 276 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); | ||
| 277 | g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); | ||
| 278 | output_registers = shader_unit.output_registers; | ||
| 279 | |||
| 280 | if (is_indexed) { | ||
| 281 | vertex_cache[vertex_cache_pos] = output_registers; | ||
| 282 | vertex_cache_ids[vertex_cache_pos] = vertex; | ||
| 283 | vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||
| 284 | } | 272 | } |
| 285 | } | 273 | } |
| 274 | } | ||
| 286 | 275 | ||
| 287 | // Retreive vertex from register data | 276 | if (!vertex_cache_hit) { |
| 288 | Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); | 277 | // Initialize data for the current vertex |
| 278 | Shader::InputVertex input; | ||
| 279 | loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | ||
| 289 | 280 | ||
| 290 | // Send to renderer | 281 | // Send to vertex shader |
| 291 | using Pica::Shader::OutputVertex; | 282 | if (g_debug_context) |
| 292 | auto AddTriangle = []( | 283 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, |
| 293 | const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { | 284 | (void*)&input); |
| 294 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | 285 | g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); |
| 295 | }; | 286 | output_registers = shader_unit.output_registers; |
| 296 | 287 | ||
| 297 | primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | 288 | if (is_indexed) { |
| 289 | vertex_cache[vertex_cache_pos] = output_registers; | ||
| 290 | vertex_cache_ids[vertex_cache_pos] = vertex; | ||
| 291 | vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||
| 292 | } | ||
| 298 | } | 293 | } |
| 299 | 294 | ||
| 300 | for (auto& range : memory_accesses.ranges) { | 295 | // Retreive vertex from register data |
| 301 | g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), | 296 | Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); |
| 302 | range.second, range.first); | 297 | |
| 303 | } | 298 | // Send to renderer |
| 299 | using Pica::Shader::OutputVertex; | ||
| 300 | auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | ||
| 301 | const OutputVertex& v2) { | ||
| 302 | VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||
| 303 | }; | ||
| 304 | 304 | ||
| 305 | break; | 305 | primitive_assembler.SubmitVertex(output_vertex, AddTriangle); |
| 306 | } | 306 | } |
| 307 | 307 | ||
| 308 | case PICA_REG_INDEX(vs.bool_uniforms): | 308 | for (auto& range : memory_accesses.ranges) { |
| 309 | for (unsigned i = 0; i < 16; ++i) | 309 | g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), |
| 310 | g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; | 310 | range.second, range.first); |
| 311 | |||
| 312 | break; | ||
| 313 | |||
| 314 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): | ||
| 315 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): | ||
| 316 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): | ||
| 317 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): | ||
| 318 | { | ||
| 319 | int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); | ||
| 320 | auto values = regs.vs.int_uniforms[index]; | ||
| 321 | g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | ||
| 322 | LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", | ||
| 323 | index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); | ||
| 324 | break; | ||
| 325 | } | 311 | } |
| 326 | 312 | ||
| 327 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): | 313 | break; |
| 328 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): | 314 | } |
| 329 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): | ||
| 330 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): | ||
| 331 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): | ||
| 332 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): | ||
| 333 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): | ||
| 334 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): | ||
| 335 | { | ||
| 336 | auto& uniform_setup = regs.vs.uniform_setup; | ||
| 337 | |||
| 338 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 339 | // it directly write the values? | ||
| 340 | uniform_write_buffer[float_regs_counter++] = value; | ||
| 341 | |||
| 342 | // Uniforms are written in a packed format such that four float24 values are encoded in | ||
| 343 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 344 | // written. | ||
| 345 | if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || | ||
| 346 | (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||
| 347 | float_regs_counter = 0; | ||
| 348 | |||
| 349 | auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; | ||
| 350 | |||
| 351 | if (uniform_setup.index > 95) { | ||
| 352 | LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); | ||
| 353 | break; | ||
| 354 | } | ||
| 355 | 315 | ||
| 356 | // NOTE: The destination component order indeed is "backwards" | 316 | case PICA_REG_INDEX(vs.bool_uniforms): |
| 357 | if (uniform_setup.IsFloat32()) { | 317 | for (unsigned i = 0; i < 16; ++i) |
| 358 | for (auto i : {0,1,2,3}) | 318 | g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; |
| 359 | uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | 319 | |
| 360 | } else { | 320 | break; |
| 361 | // TODO: Untested | 321 | |
| 362 | uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); | 322 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): |
| 363 | uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | 323 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): |
| 364 | uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); | 324 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): |
| 365 | uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); | 325 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { |
| 366 | } | 326 | int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); |
| 327 | auto values = regs.vs.int_uniforms[index]; | ||
| 328 | g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | ||
| 329 | LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), | ||
| 330 | values.y.Value(), values.z.Value(), values.w.Value()); | ||
| 331 | break; | ||
| 332 | } | ||
| 367 | 333 | ||
| 368 | LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, | 334 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): |
| 369 | uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), | 335 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): |
| 370 | uniform.w.ToFloat32()); | 336 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): |
| 337 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): | ||
| 338 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): | ||
| 339 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): | ||
| 340 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): | ||
| 341 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { | ||
| 342 | auto& uniform_setup = regs.vs.uniform_setup; | ||
| 343 | |||
| 344 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 345 | // it directly write the values? | ||
| 346 | uniform_write_buffer[float_regs_counter++] = value; | ||
| 347 | |||
| 348 | // Uniforms are written in a packed format such that four float24 values are encoded in | ||
| 349 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 350 | // written. | ||
| 351 | if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || | ||
| 352 | (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||
| 353 | float_regs_counter = 0; | ||
| 354 | |||
| 355 | auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; | ||
| 356 | |||
| 357 | if (uniform_setup.index > 95) { | ||
| 358 | LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); | ||
| 359 | break; | ||
| 360 | } | ||
| 371 | 361 | ||
| 372 | // TODO: Verify that this actually modifies the register! | 362 | // NOTE: The destination component order indeed is "backwards" |
| 373 | uniform_setup.index.Assign(uniform_setup.index + 1); | 363 | if (uniform_setup.IsFloat32()) { |
| 364 | for (auto i : {0, 1, 2, 3}) | ||
| 365 | uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | ||
| 366 | } else { | ||
| 367 | // TODO: Untested | ||
| 368 | uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); | ||
| 369 | uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | | ||
| 370 | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 371 | uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | | ||
| 372 | ((uniform_write_buffer[2] >> 24) & 0xFF)); | ||
| 373 | uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); | ||
| 374 | } | 374 | } |
| 375 | break; | ||
| 376 | } | ||
| 377 | 375 | ||
| 378 | // Load shader program code | 376 | LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, |
| 379 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): | 377 | uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), |
| 380 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): | 378 | uniform.w.ToFloat32()); |
| 381 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): | ||
| 382 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): | ||
| 383 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): | ||
| 384 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): | ||
| 385 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): | ||
| 386 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): | ||
| 387 | { | ||
| 388 | g_state.vs.program_code[regs.vs.program.offset] = value; | ||
| 389 | regs.vs.program.offset++; | ||
| 390 | break; | ||
| 391 | } | ||
| 392 | 379 | ||
| 393 | // Load swizzle pattern data | 380 | // TODO: Verify that this actually modifies the register! |
| 394 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): | 381 | uniform_setup.index.Assign(uniform_setup.index + 1); |
| 395 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): | ||
| 396 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): | ||
| 397 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): | ||
| 398 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): | ||
| 399 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): | ||
| 400 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): | ||
| 401 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): | ||
| 402 | { | ||
| 403 | g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; | ||
| 404 | regs.vs.swizzle_patterns.offset++; | ||
| 405 | break; | ||
| 406 | } | 382 | } |
| 383 | break; | ||
| 384 | } | ||
| 407 | 385 | ||
| 408 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): | 386 | // Load shader program code |
| 409 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): | 387 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): |
| 410 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): | 388 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): |
| 411 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): | 389 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): |
| 412 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): | 390 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): |
| 413 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): | 391 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): |
| 414 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): | 392 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): |
| 415 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): | 393 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): |
| 416 | { | 394 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { |
| 417 | auto& lut_config = regs.lighting.lut_config; | 395 | g_state.vs.program_code[regs.vs.program.offset] = value; |
| 418 | 396 | regs.vs.program.offset++; | |
| 419 | ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); | 397 | break; |
| 420 | 398 | } | |
| 421 | g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; | ||
| 422 | lut_config.index.Assign(lut_config.index + 1); | ||
| 423 | break; | ||
| 424 | } | ||
| 425 | 399 | ||
| 426 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): | 400 | // Load swizzle pattern data |
| 427 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): | 401 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): |
| 428 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): | 402 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): |
| 429 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): | 403 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): |
| 430 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): | 404 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): |
| 431 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): | 405 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): |
| 432 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): | 406 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): |
| 433 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): | 407 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): |
| 434 | { | 408 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { |
| 435 | g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; | 409 | g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; |
| 436 | regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); | 410 | regs.vs.swizzle_patterns.offset++; |
| 437 | break; | 411 | break; |
| 438 | } | 412 | } |
| 413 | |||
| 414 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): | ||
| 415 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): | ||
| 416 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): | ||
| 417 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): | ||
| 418 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): | ||
| 419 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): | ||
| 420 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): | ||
| 421 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { | ||
| 422 | auto& lut_config = regs.lighting.lut_config; | ||
| 423 | |||
| 424 | ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); | ||
| 425 | |||
| 426 | g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; | ||
| 427 | lut_config.index.Assign(lut_config.index + 1); | ||
| 428 | break; | ||
| 429 | } | ||
| 439 | 430 | ||
| 440 | default: | 431 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): |
| 441 | break; | 432 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): |
| 433 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): | ||
| 434 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): | ||
| 435 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): | ||
| 436 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): | ||
| 437 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): | ||
| 438 | case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): { | ||
| 439 | g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; | ||
| 440 | regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); | ||
| 441 | break; | ||
| 442 | } | ||
| 443 | |||
| 444 | default: | ||
| 445 | break; | ||
| 442 | } | 446 | } |
| 443 | 447 | ||
| 444 | VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id); | 448 | VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id); |
| 445 | 449 | ||
| 446 | if (g_debug_context) | 450 | if (g_debug_context) |
| 447 | g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id)); | 451 | g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, |
| 452 | reinterpret_cast<void*>(&id)); | ||
| 448 | } | 453 | } |
| 449 | 454 | ||
| 450 | void ProcessCommandList(const u32* list, u32 size) { | 455 | void ProcessCommandList(const u32* list, u32 size) { |
| @@ -458,14 +463,14 @@ void ProcessCommandList(const u32* list, u32 size) { | |||
| 458 | ++g_state.cmd_list.current_ptr; | 463 | ++g_state.cmd_list.current_ptr; |
| 459 | 464 | ||
| 460 | u32 value = *g_state.cmd_list.current_ptr++; | 465 | u32 value = *g_state.cmd_list.current_ptr++; |
| 461 | const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; | 466 | const CommandHeader header = {*g_state.cmd_list.current_ptr++}; |
| 462 | 467 | ||
| 463 | WritePicaReg(header.cmd_id, value, header.parameter_mask); | 468 | WritePicaReg(header.cmd_id, value, header.parameter_mask); |
| 464 | 469 | ||
| 465 | for (unsigned i = 0; i < header.extra_data_length; ++i) { | 470 | for (unsigned i = 0; i < header.extra_data_length; ++i) { |
| 466 | u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); | 471 | u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); |
| 467 | WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask); | 472 | WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, header.parameter_mask); |
| 468 | } | 473 | } |
| 469 | } | 474 | } |
| 470 | } | 475 | } |
| 471 | 476 | ||
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index 022a71f5e..62ad2d3f3 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <type_traits> | 7 | #include <type_traits> |
| 8 | |||
| 9 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 10 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 11 | 10 | ||
| @@ -16,7 +15,7 @@ namespace CommandProcessor { | |||
| 16 | union CommandHeader { | 15 | union CommandHeader { |
| 17 | u32 hex; | 16 | u32 hex; |
| 18 | 17 | ||
| 19 | BitField< 0, 16, u32> cmd_id; | 18 | BitField<0, 16, u32> cmd_id; |
| 20 | 19 | ||
| 21 | // parameter_mask: | 20 | // parameter_mask: |
| 22 | // Mask applied to the input value to make it possible to update | 21 | // Mask applied to the input value to make it possible to update |
| @@ -25,11 +24,11 @@ union CommandHeader { | |||
| 25 | // second bit: 0x0000FF00 | 24 | // second bit: 0x0000FF00 |
| 26 | // third bit: 0x00FF0000 | 25 | // third bit: 0x00FF0000 |
| 27 | // fourth bit: 0xFF000000 | 26 | // fourth bit: 0xFF000000 |
| 28 | BitField<16, 4, u32> parameter_mask; | 27 | BitField<16, 4, u32> parameter_mask; |
| 29 | 28 | ||
| 30 | BitField<20, 11, u32> extra_data_length; | 29 | BitField<20, 11, u32> extra_data_length; |
| 31 | 30 | ||
| 32 | BitField<31, 1, u32> group_commands; | 31 | BitField<31, 1, u32> group_commands; |
| 33 | }; | 32 | }; |
| 34 | static_assert(std::is_standard_layout<CommandHeader>::value == true, | 33 | static_assert(std::is_standard_layout<CommandHeader>::value == true, |
| 35 | "CommandHeader does not use standard layout"); | 34 | "CommandHeader does not use standard layout"); |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index bfa686380..8806464d9 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include <nihstro/bit_field.h> | 20 | #include <nihstro/bit_field.h> |
| 21 | #include <nihstro/float24.h> | 21 | #include <nihstro/float24.h> |
| 22 | #include <nihstro/shader_binary.h> | 22 | #include <nihstro/shader_binary.h> |
| 23 | |||
| 24 | #include "common/assert.h" | 23 | #include "common/assert.h" |
| 25 | #include "common/bit_field.h" | 24 | #include "common/bit_field.h" |
| 26 | #include "common/color.h" | 25 | #include "common/color.h" |
| @@ -29,7 +28,6 @@ | |||
| 29 | #include "common/logging/log.h" | 28 | #include "common/logging/log.h" |
| 30 | #include "common/math_util.h" | 29 | #include "common/math_util.h" |
| 31 | #include "common/vector_math.h" | 30 | #include "common/vector_math.h" |
| 32 | |||
| 33 | #include "video_core/debug_utils/debug_utils.h" | 31 | #include "video_core/debug_utils/debug_utils.h" |
| 34 | #include "video_core/pica.h" | 32 | #include "video_core/pica.h" |
| 35 | #include "video_core/pica_state.h" | 33 | #include "video_core/pica_state.h" |
| @@ -50,7 +48,8 @@ void DebugContext::DoOnEvent(Event event, void* data) { | |||
| 50 | { | 48 | { |
| 51 | std::unique_lock<std::mutex> lock(breakpoint_mutex); | 49 | std::unique_lock<std::mutex> lock(breakpoint_mutex); |
| 52 | 50 | ||
| 53 | // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets | 51 | // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug |
| 52 | // widgets | ||
| 54 | VideoCore::g_renderer->Rasterizer()->FlushAll(); | 53 | VideoCore::g_renderer->Rasterizer()->FlushAll(); |
| 55 | 54 | ||
| 56 | // TODO: Should stop the CPU thread here once we multithread emulation. | 55 | // TODO: Should stop the CPU thread here once we multithread emulation. |
| @@ -64,7 +63,7 @@ void DebugContext::DoOnEvent(Event event, void* data) { | |||
| 64 | } | 63 | } |
| 65 | 64 | ||
| 66 | // Wait until another thread tells us to Resume() | 65 | // Wait until another thread tells us to Resume() |
| 67 | resume_from_breakpoint.wait(lock, [&]{ return !at_breakpoint; }); | 66 | resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; }); |
| 68 | } | 67 | } |
| 69 | } | 68 | } |
| 70 | 69 | ||
| @@ -88,8 +87,9 @@ std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this global | |||
| 88 | 87 | ||
| 89 | namespace DebugUtils { | 88 | namespace DebugUtils { |
| 90 | 89 | ||
| 91 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) | 90 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, |
| 92 | { | 91 | const Shader::ShaderSetup& setup, |
| 92 | const Regs::VSOutputAttributes* output_attributes) { | ||
| 93 | struct StuffToWrite { | 93 | struct StuffToWrite { |
| 94 | const u8* pointer; | 94 | const u8* pointer; |
| 95 | u32 size; | 95 | u32 size; |
| @@ -97,7 +97,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c | |||
| 97 | std::vector<StuffToWrite> writing_queue; | 97 | std::vector<StuffToWrite> writing_queue; |
| 98 | u32 write_offset = 0; | 98 | u32 write_offset = 0; |
| 99 | 99 | ||
| 100 | auto QueueForWriting = [&writing_queue,&write_offset](const u8* pointer, u32 size) { | 100 | auto QueueForWriting = [&writing_queue, &write_offset](const u8* pointer, u32 size) { |
| 101 | writing_queue.push_back({pointer, size}); | 101 | writing_queue.push_back({pointer, size}); |
| 102 | u32 old_write_offset = write_offset; | 102 | u32 old_write_offset = write_offset; |
| 103 | write_offset += size; | 103 | write_offset += size; |
| @@ -108,99 +108,95 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c | |||
| 108 | // into shbin format (separate type and component mask). | 108 | // into shbin format (separate type and component mask). |
| 109 | union OutputRegisterInfo { | 109 | union OutputRegisterInfo { |
| 110 | enum Type : u64 { | 110 | enum Type : u64 { |
| 111 | POSITION = 0, | 111 | POSITION = 0, |
| 112 | QUATERNION = 1, | 112 | QUATERNION = 1, |
| 113 | COLOR = 2, | 113 | COLOR = 2, |
| 114 | TEXCOORD0 = 3, | 114 | TEXCOORD0 = 3, |
| 115 | TEXCOORD1 = 5, | 115 | TEXCOORD1 = 5, |
| 116 | TEXCOORD2 = 6, | 116 | TEXCOORD2 = 6, |
| 117 | 117 | ||
| 118 | VIEW = 8, | 118 | VIEW = 8, |
| 119 | }; | 119 | }; |
| 120 | 120 | ||
| 121 | BitField< 0, 64, u64> hex; | 121 | BitField<0, 64, u64> hex; |
| 122 | 122 | ||
| 123 | BitField< 0, 16, Type> type; | 123 | BitField<0, 16, Type> type; |
| 124 | BitField<16, 16, u64> id; | 124 | BitField<16, 16, u64> id; |
| 125 | BitField<32, 4, u64> component_mask; | 125 | BitField<32, 4, u64> component_mask; |
| 126 | }; | 126 | }; |
| 127 | 127 | ||
| 128 | // This is put into a try-catch block to make sure we notice unknown configurations. | 128 | // This is put into a try-catch block to make sure we notice unknown configurations. |
| 129 | std::vector<OutputRegisterInfo> output_info_table; | 129 | std::vector<OutputRegisterInfo> output_info_table; |
| 130 | for (unsigned i = 0; i < 7; ++i) { | 130 | for (unsigned i = 0; i < 7; ++i) { |
| 131 | using OutputAttributes = Pica::Regs::VSOutputAttributes; | 131 | using OutputAttributes = Pica::Regs::VSOutputAttributes; |
| 132 | 132 | ||
| 133 | // TODO: It's still unclear how the attribute components map to the register! | 133 | // TODO: It's still unclear how the attribute components map to the register! |
| 134 | // Once we know that, this code probably will not make much sense anymore. | 134 | // Once we know that, this code probably will not make much sense anymore. |
| 135 | std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = { | 135 | std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32>> map = { |
| 136 | { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, | 136 | {OutputAttributes::POSITION_X, {OutputRegisterInfo::POSITION, 1}}, |
| 137 | { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, | 137 | {OutputAttributes::POSITION_Y, {OutputRegisterInfo::POSITION, 2}}, |
| 138 | { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, | 138 | {OutputAttributes::POSITION_Z, {OutputRegisterInfo::POSITION, 4}}, |
| 139 | { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, | 139 | {OutputAttributes::POSITION_W, {OutputRegisterInfo::POSITION, 8}}, |
| 140 | { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} }, | 140 | {OutputAttributes::QUATERNION_X, {OutputRegisterInfo::QUATERNION, 1}}, |
| 141 | { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} }, | 141 | {OutputAttributes::QUATERNION_Y, {OutputRegisterInfo::QUATERNION, 2}}, |
| 142 | { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} }, | 142 | {OutputAttributes::QUATERNION_Z, {OutputRegisterInfo::QUATERNION, 4}}, |
| 143 | { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} }, | 143 | {OutputAttributes::QUATERNION_W, {OutputRegisterInfo::QUATERNION, 8}}, |
| 144 | { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, | 144 | {OutputAttributes::COLOR_R, {OutputRegisterInfo::COLOR, 1}}, |
| 145 | { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, | 145 | {OutputAttributes::COLOR_G, {OutputRegisterInfo::COLOR, 2}}, |
| 146 | { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, | 146 | {OutputAttributes::COLOR_B, {OutputRegisterInfo::COLOR, 4}}, |
| 147 | { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, | 147 | {OutputAttributes::COLOR_A, {OutputRegisterInfo::COLOR, 8}}, |
| 148 | { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, | 148 | {OutputAttributes::TEXCOORD0_U, {OutputRegisterInfo::TEXCOORD0, 1}}, |
| 149 | { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, | 149 | {OutputAttributes::TEXCOORD0_V, {OutputRegisterInfo::TEXCOORD0, 2}}, |
| 150 | { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, | 150 | {OutputAttributes::TEXCOORD1_U, {OutputRegisterInfo::TEXCOORD1, 1}}, |
| 151 | { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, | 151 | {OutputAttributes::TEXCOORD1_V, {OutputRegisterInfo::TEXCOORD1, 2}}, |
| 152 | { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, | 152 | {OutputAttributes::TEXCOORD2_U, {OutputRegisterInfo::TEXCOORD2, 1}}, |
| 153 | { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }, | 153 | {OutputAttributes::TEXCOORD2_V, {OutputRegisterInfo::TEXCOORD2, 2}}, |
| 154 | { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} }, | 154 | {OutputAttributes::VIEW_X, {OutputRegisterInfo::VIEW, 1}}, |
| 155 | { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} }, | 155 | {OutputAttributes::VIEW_Y, {OutputRegisterInfo::VIEW, 2}}, |
| 156 | { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} } | 156 | {OutputAttributes::VIEW_Z, {OutputRegisterInfo::VIEW, 4}}, |
| 157 | }; | 157 | }; |
| 158 | 158 | ||
| 159 | for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ | 159 | for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ |
| 160 | output_attributes[i].map_x, | 160 | output_attributes[i].map_x, output_attributes[i].map_y, output_attributes[i].map_z, |
| 161 | output_attributes[i].map_y, | 161 | output_attributes[i].map_w}) { |
| 162 | output_attributes[i].map_z, | 162 | if (semantic == OutputAttributes::INVALID) |
| 163 | output_attributes[i].map_w }) { | 163 | continue; |
| 164 | if (semantic == OutputAttributes::INVALID) | 164 | |
| 165 | continue; | 165 | try { |
| 166 | 166 | OutputRegisterInfo::Type type = map.at(semantic).first; | |
| 167 | try { | 167 | u32 component_mask = map.at(semantic).second; |
| 168 | OutputRegisterInfo::Type type = map.at(semantic).first; | 168 | |
| 169 | u32 component_mask = map.at(semantic).second; | 169 | auto it = std::find_if(output_info_table.begin(), output_info_table.end(), |
| 170 | 170 | [&i, &type](const OutputRegisterInfo& info) { | |
| 171 | auto it = std::find_if(output_info_table.begin(), output_info_table.end(), | 171 | return info.id == i && info.type == type; |
| 172 | [&i, &type](const OutputRegisterInfo& info) { | 172 | }); |
| 173 | return info.id == i && info.type == type; | 173 | |
| 174 | } | 174 | if (it == output_info_table.end()) { |
| 175 | ); | 175 | output_info_table.emplace_back(); |
| 176 | 176 | output_info_table.back().type.Assign(type); | |
| 177 | if (it == output_info_table.end()) { | 177 | output_info_table.back().component_mask.Assign(component_mask); |
| 178 | output_info_table.emplace_back(); | 178 | output_info_table.back().id.Assign(i); |
| 179 | output_info_table.back().type.Assign(type); | 179 | } else { |
| 180 | output_info_table.back().component_mask.Assign(component_mask); | 180 | it->component_mask.Assign(it->component_mask | component_mask); |
| 181 | output_info_table.back().id.Assign(i); | ||
| 182 | } else { | ||
| 183 | it->component_mask.Assign(it->component_mask | component_mask); | ||
| 184 | } | ||
| 185 | } catch (const std::out_of_range& ) { | ||
| 186 | DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); | ||
| 187 | LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", | ||
| 188 | (int)output_attributes[i].map_x.Value(), | ||
| 189 | (int)output_attributes[i].map_y.Value(), | ||
| 190 | (int)output_attributes[i].map_z.Value(), | ||
| 191 | (int)output_attributes[i].map_w.Value()); | ||
| 192 | } | 181 | } |
| 182 | } catch (const std::out_of_range&) { | ||
| 183 | DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); | ||
| 184 | LOG_ERROR(HW_GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", | ||
| 185 | (int)output_attributes[i].map_x.Value(), | ||
| 186 | (int)output_attributes[i].map_y.Value(), | ||
| 187 | (int)output_attributes[i].map_z.Value(), | ||
| 188 | (int)output_attributes[i].map_w.Value()); | ||
| 193 | } | 189 | } |
| 194 | } | 190 | } |
| 195 | 191 | } | |
| 196 | 192 | ||
| 197 | struct { | 193 | struct { |
| 198 | DVLBHeader header; | 194 | DVLBHeader header; |
| 199 | u32 dvle_offset; | 195 | u32 dvle_offset; |
| 200 | } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE | 196 | } dvlb{{DVLBHeader::MAGIC_WORD, 1}}; // 1 DVLE |
| 201 | 197 | ||
| 202 | DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; | 198 | DVLPHeader dvlp{DVLPHeader::MAGIC_WORD}; |
| 203 | DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; | 199 | DVLEHeader dvle{DVLEHeader::MAGIC_WORD}; |
| 204 | 200 | ||
| 205 | QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb)); | 201 | QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb)); |
| 206 | u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp)); | 202 | u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp)); |
| @@ -216,14 +212,16 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c | |||
| 216 | dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size()); | 212 | dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size()); |
| 217 | u32 dummy = 0; | 213 | u32 dummy = 0; |
| 218 | for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { | 214 | for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { |
| 219 | QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); | 215 | QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), |
| 216 | sizeof(setup.swizzle_data[i])); | ||
| 220 | QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy)); | 217 | QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy)); |
| 221 | } | 218 | } |
| 222 | 219 | ||
| 223 | dvle.main_offset_words = config.main_offset; | 220 | dvle.main_offset_words = config.main_offset; |
| 224 | dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; | 221 | dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; |
| 225 | dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); | 222 | dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); |
| 226 | QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); | 223 | QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()), |
| 224 | static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); | ||
| 227 | 225 | ||
| 228 | // TODO: Create a label table for "main" | 226 | // TODO: Create a label table for "main" |
| 229 | 227 | ||
| @@ -258,10 +256,8 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c | |||
| 258 | constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); | 256 | constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); |
| 259 | 257 | ||
| 260 | // Store constant if it's different from zero.. | 258 | // Store constant if it's different from zero.. |
| 261 | if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || | 259 | if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || setup.uniforms.f[i].y.ToFloat32() != 0.0 || |
| 262 | setup.uniforms.f[i].y.ToFloat32() != 0.0 || | 260 | setup.uniforms.f[i].z.ToFloat32() != 0.0 || setup.uniforms.f[i].w.ToFloat32() != 0.0) |
| 263 | setup.uniforms.f[i].z.ToFloat32() != 0.0 || | ||
| 264 | setup.uniforms.f[i].w.ToFloat32() != 0.0) | ||
| 265 | constant_table.emplace_back(constant); | 261 | constant_table.emplace_back(constant); |
| 266 | } | 262 | } |
| 267 | dvle.constant_table_offset = write_offset - dvlb.dvle_offset; | 263 | dvle.constant_table_offset = write_offset - dvlb.dvle_offset; |
| @@ -282,8 +278,7 @@ static std::unique_ptr<PicaTrace> pica_trace; | |||
| 282 | static std::mutex pica_trace_mutex; | 278 | static std::mutex pica_trace_mutex; |
| 283 | static int is_pica_tracing = false; | 279 | static int is_pica_tracing = false; |
| 284 | 280 | ||
| 285 | void StartPicaTracing() | 281 | void StartPicaTracing() { |
| 286 | { | ||
| 287 | if (is_pica_tracing) { | 282 | if (is_pica_tracing) { |
| 288 | LOG_WARNING(HW_GPU, "StartPicaTracing called even though tracing already running!"); | 283 | LOG_WARNING(HW_GPU, "StartPicaTracing called even though tracing already running!"); |
| 289 | return; | 284 | return; |
| @@ -295,13 +290,11 @@ void StartPicaTracing() | |||
| 295 | is_pica_tracing = true; | 290 | is_pica_tracing = true; |
| 296 | } | 291 | } |
| 297 | 292 | ||
| 298 | bool IsPicaTracing() | 293 | bool IsPicaTracing() { |
| 299 | { | ||
| 300 | return is_pica_tracing != 0; | 294 | return is_pica_tracing != 0; |
| 301 | } | 295 | } |
| 302 | 296 | ||
| 303 | void OnPicaRegWrite(PicaTrace::Write write) | 297 | void OnPicaRegWrite(PicaTrace::Write write) { |
| 304 | { | ||
| 305 | // Double check for is_pica_tracing to avoid pointless locking overhead | 298 | // Double check for is_pica_tracing to avoid pointless locking overhead |
| 306 | if (!is_pica_tracing) | 299 | if (!is_pica_tracing) |
| 307 | return; | 300 | return; |
| @@ -314,8 +307,7 @@ void OnPicaRegWrite(PicaTrace::Write write) | |||
| 314 | pica_trace->writes.push_back(write); | 307 | pica_trace->writes.push_back(write); |
| 315 | } | 308 | } |
| 316 | 309 | ||
| 317 | std::unique_ptr<PicaTrace> FinishPicaTracing() | 310 | std::unique_ptr<PicaTrace> FinishPicaTracing() { |
| 318 | { | ||
| 319 | if (!is_pica_tracing) { | 311 | if (!is_pica_tracing) { |
| 320 | LOG_WARNING(HW_GPU, "FinishPicaTracing called even though tracing isn't running!"); | 312 | LOG_WARNING(HW_GPU, "FinishPicaTracing called even though tracing isn't running!"); |
| 321 | return {}; | 313 | return {}; |
| @@ -331,12 +323,12 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() | |||
| 331 | return ret; | 323 | return ret; |
| 332 | } | 324 | } |
| 333 | 325 | ||
| 334 | const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { | 326 | const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, |
| 327 | bool disable_alpha) { | ||
| 335 | const unsigned int coarse_x = x & ~7; | 328 | const unsigned int coarse_x = x & ~7; |
| 336 | const unsigned int coarse_y = y & ~7; | 329 | const unsigned int coarse_y = y & ~7; |
| 337 | 330 | ||
| 338 | if (info.format != Regs::TextureFormat::ETC1 && | 331 | if (info.format != Regs::TextureFormat::ETC1 && info.format != Regs::TextureFormat::ETC1A4) { |
| 339 | info.format != Regs::TextureFormat::ETC1A4) { | ||
| 340 | // TODO(neobrain): Fix code design to unify vertical block offsets! | 332 | // TODO(neobrain): Fix code design to unify vertical block offsets! |
| 341 | source += coarse_y * info.stride; | 333 | source += coarse_y * info.stride; |
| 342 | } | 334 | } |
| @@ -344,73 +336,63 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 344 | // TODO: Assert that width/height are multiples of block dimensions | 336 | // TODO: Assert that width/height are multiples of block dimensions |
| 345 | 337 | ||
| 346 | switch (info.format) { | 338 | switch (info.format) { |
| 347 | case Regs::TextureFormat::RGBA8: | 339 | case Regs::TextureFormat::RGBA8: { |
| 348 | { | ||
| 349 | auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4)); | 340 | auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4)); |
| 350 | return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; | 341 | return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; |
| 351 | } | 342 | } |
| 352 | 343 | ||
| 353 | case Regs::TextureFormat::RGB8: | 344 | case Regs::TextureFormat::RGB8: { |
| 354 | { | ||
| 355 | auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3)); | 345 | auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3)); |
| 356 | return { res.r(), res.g(), res.b(), 255 }; | 346 | return {res.r(), res.g(), res.b(), 255}; |
| 357 | } | 347 | } |
| 358 | 348 | ||
| 359 | case Regs::TextureFormat::RGB5A1: | 349 | case Regs::TextureFormat::RGB5A1: { |
| 360 | { | ||
| 361 | auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2)); | 350 | auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2)); |
| 362 | return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; | 351 | return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; |
| 363 | } | 352 | } |
| 364 | 353 | ||
| 365 | case Regs::TextureFormat::RGB565: | 354 | case Regs::TextureFormat::RGB565: { |
| 366 | { | ||
| 367 | auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2)); | 355 | auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2)); |
| 368 | return { res.r(), res.g(), res.b(), 255 }; | 356 | return {res.r(), res.g(), res.b(), 255}; |
| 369 | } | 357 | } |
| 370 | 358 | ||
| 371 | case Regs::TextureFormat::RGBA4: | 359 | case Regs::TextureFormat::RGBA4: { |
| 372 | { | ||
| 373 | auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2)); | 360 | auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2)); |
| 374 | return { res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a()) }; | 361 | return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; |
| 375 | } | 362 | } |
| 376 | 363 | ||
| 377 | case Regs::TextureFormat::IA8: | 364 | case Regs::TextureFormat::IA8: { |
| 378 | { | ||
| 379 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); | 365 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); |
| 380 | 366 | ||
| 381 | if (disable_alpha) { | 367 | if (disable_alpha) { |
| 382 | // Show intensity as red, alpha as green | 368 | // Show intensity as red, alpha as green |
| 383 | return { source_ptr[1], source_ptr[0], 0, 255 }; | 369 | return {source_ptr[1], source_ptr[0], 0, 255}; |
| 384 | } else { | 370 | } else { |
| 385 | return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0] }; | 371 | return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; |
| 386 | } | 372 | } |
| 387 | } | 373 | } |
| 388 | 374 | ||
| 389 | case Regs::TextureFormat::RG8: | 375 | case Regs::TextureFormat::RG8: { |
| 390 | { | ||
| 391 | auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2)); | 376 | auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2)); |
| 392 | return { res.r(), res.g(), 0, 255 }; | 377 | return {res.r(), res.g(), 0, 255}; |
| 393 | } | 378 | } |
| 394 | 379 | ||
| 395 | case Regs::TextureFormat::I8: | 380 | case Regs::TextureFormat::I8: { |
| 396 | { | ||
| 397 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); | 381 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); |
| 398 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; | 382 | return {*source_ptr, *source_ptr, *source_ptr, 255}; |
| 399 | } | 383 | } |
| 400 | 384 | ||
| 401 | case Regs::TextureFormat::A8: | 385 | case Regs::TextureFormat::A8: { |
| 402 | { | ||
| 403 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); | 386 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); |
| 404 | 387 | ||
| 405 | if (disable_alpha) { | 388 | if (disable_alpha) { |
| 406 | return { *source_ptr, *source_ptr, *source_ptr, 255 }; | 389 | return {*source_ptr, *source_ptr, *source_ptr, 255}; |
| 407 | } else { | 390 | } else { |
| 408 | return { 0, 0, 0, *source_ptr }; | 391 | return {0, 0, 0, *source_ptr}; |
| 409 | } | 392 | } |
| 410 | } | 393 | } |
| 411 | 394 | ||
| 412 | case Regs::TextureFormat::IA4: | 395 | case Regs::TextureFormat::IA4: { |
| 413 | { | ||
| 414 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); | 396 | const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); |
| 415 | 397 | ||
| 416 | u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); | 398 | u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); |
| @@ -418,25 +400,23 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 418 | 400 | ||
| 419 | if (disable_alpha) { | 401 | if (disable_alpha) { |
| 420 | // Show intensity as red, alpha as green | 402 | // Show intensity as red, alpha as green |
| 421 | return { i, a, 0, 255 }; | 403 | return {i, a, 0, 255}; |
| 422 | } else { | 404 | } else { |
| 423 | return { i, i, i, a }; | 405 | return {i, i, i, a}; |
| 424 | } | 406 | } |
| 425 | } | 407 | } |
| 426 | 408 | ||
| 427 | case Regs::TextureFormat::I4: | 409 | case Regs::TextureFormat::I4: { |
| 428 | { | ||
| 429 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); | 410 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); |
| 430 | const u8* source_ptr = source + morton_offset / 2; | 411 | const u8* source_ptr = source + morton_offset / 2; |
| 431 | 412 | ||
| 432 | u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); | 413 | u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); |
| 433 | i = Color::Convert4To8(i); | 414 | i = Color::Convert4To8(i); |
| 434 | 415 | ||
| 435 | return { i, i, i, 255 }; | 416 | return {i, i, i, 255}; |
| 436 | } | 417 | } |
| 437 | 418 | ||
| 438 | case Regs::TextureFormat::A4: | 419 | case Regs::TextureFormat::A4: { |
| 439 | { | ||
| 440 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); | 420 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); |
| 441 | const u8* source_ptr = source + morton_offset / 2; | 421 | const u8* source_ptr = source + morton_offset / 2; |
| 442 | 422 | ||
| @@ -444,15 +424,14 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 444 | a = Color::Convert4To8(a); | 424 | a = Color::Convert4To8(a); |
| 445 | 425 | ||
| 446 | if (disable_alpha) { | 426 | if (disable_alpha) { |
| 447 | return { a, a, a, 255 }; | 427 | return {a, a, a, 255}; |
| 448 | } else { | 428 | } else { |
| 449 | return { 0, 0, 0, a }; | 429 | return {0, 0, 0, a}; |
| 450 | } | 430 | } |
| 451 | } | 431 | } |
| 452 | 432 | ||
| 453 | case Regs::TextureFormat::ETC1: | 433 | case Regs::TextureFormat::ETC1: |
| 454 | case Regs::TextureFormat::ETC1A4: | 434 | case Regs::TextureFormat::ETC1A4: { |
| 455 | { | ||
| 456 | bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4); | 435 | bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4); |
| 457 | 436 | ||
| 458 | // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles | 437 | // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles |
| @@ -462,10 +441,9 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 462 | int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1); | 441 | int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1); |
| 463 | unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name... | 442 | unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name... |
| 464 | 443 | ||
| 465 | const u64* source_ptr = (const u64*)(source | 444 | const u64* source_ptr = (const u64*)(source + coarse_x * subtile_bytes * 4 + |
| 466 | + coarse_x * subtile_bytes * 4 | 445 | coarse_y * subtile_bytes * 4 * (info.width / 8) + |
| 467 | + coarse_y * subtile_bytes * 4 * (info.width / 8) | 446 | subtile_index * subtile_bytes * 8); |
| 468 | + subtile_index * subtile_bytes * 8); | ||
| 469 | u64 alpha = 0xFFFFFFFFFFFFFFFF; | 447 | u64 alpha = 0xFFFFFFFFFFFFFFFF; |
| 470 | if (has_alpha) { | 448 | if (has_alpha) { |
| 471 | alpha = *source_ptr; | 449 | alpha = *source_ptr; |
| @@ -474,7 +452,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 474 | 452 | ||
| 475 | union ETC1Tile { | 453 | union ETC1Tile { |
| 476 | // Each of these two is a collection of 16 bits (one per lookup value) | 454 | // Each of these two is a collection of 16 bits (one per lookup value) |
| 477 | BitField< 0, 16, u64> table_subindexes; | 455 | BitField<0, 16, u64> table_subindexes; |
| 478 | BitField<16, 16, u64> negation_flags; | 456 | BitField<16, 16, u64> negation_flags; |
| 479 | 457 | ||
| 480 | unsigned GetTableSubIndex(unsigned index) const { | 458 | unsigned GetTableSubIndex(unsigned index) const { |
| @@ -547,11 +525,18 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 547 | } | 525 | } |
| 548 | 526 | ||
| 549 | // Add modifier | 527 | // Add modifier |
| 550 | unsigned table_index = static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); | 528 | unsigned table_index = |
| 529 | static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); | ||
| 551 | 530 | ||
| 552 | static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ | 531 | static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ |
| 553 | {{ 2, 8 }}, {{ 5, 17 }}, {{ 9, 29 }}, {{ 13, 42 }}, | 532 | {{2, 8}}, |
| 554 | {{ 18, 60 }}, {{ 24, 80 }}, {{ 33, 106 }}, {{ 47, 183 }} | 533 | {{5, 17}}, |
| 534 | {{9, 29}}, | ||
| 535 | {{13, 42}}, | ||
| 536 | {{18, 60}}, | ||
| 537 | {{24, 80}}, | ||
| 538 | {{33, 106}}, | ||
| 539 | {{47, 183}}, | ||
| 555 | }}; | 540 | }}; |
| 556 | 541 | ||
| 557 | int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel)); | 542 | int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel)); |
| @@ -564,7 +549,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 564 | 549 | ||
| 565 | return ret.Cast<u8>(); | 550 | return ret.Cast<u8>(); |
| 566 | } | 551 | } |
| 567 | } const *etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr); | 552 | } const* etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr); |
| 568 | 553 | ||
| 569 | alpha >>= 4 * ((x & 3) * 4 + (y & 3)); | 554 | alpha >>= 4 * ((x & 3) * 4 + (y & 3)); |
| 570 | return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3), | 555 | return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3), |
| @@ -579,8 +564,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 579 | } | 564 | } |
| 580 | 565 | ||
| 581 | TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, | 566 | TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, |
| 582 | const Regs::TextureFormat& format) | 567 | const Regs::TextureFormat& format) { |
| 583 | { | ||
| 584 | TextureInfo info; | 568 | TextureInfo info; |
| 585 | info.physical_address = config.GetPhysicalAddress(); | 569 | info.physical_address = config.GetPhysicalAddress(); |
| 586 | info.width = config.width; | 570 | info.width = config.width; |
| @@ -595,13 +579,13 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, | |||
| 595 | static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { | 579 | static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { |
| 596 | auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); | 580 | auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); |
| 597 | if (!fp->WriteBytes(data, length)) | 581 | if (!fp->WriteBytes(data, length)) |
| 598 | png_error(png_ptr, "Failed to write to output PNG file."); | 582 | png_error(png_ptr, "Failed to write to output PNG file."); |
| 599 | } | 583 | } |
| 600 | 584 | ||
| 601 | static void FlushIOFile(png_structp png_ptr) { | 585 | static void FlushIOFile(png_structp png_ptr) { |
| 602 | auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); | 586 | auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); |
| 603 | if (!fp->Flush()) | 587 | if (!fp->Flush()) |
| 604 | png_error(png_ptr, "Failed to flush to output PNG file."); | 588 | png_error(png_ptr, "Failed to flush to output PNG file."); |
| 605 | } | 589 | } |
| 606 | #endif | 590 | #endif |
| 607 | 591 | ||
| @@ -614,7 +598,8 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | |||
| 614 | 598 | ||
| 615 | // Write data to file | 599 | // Write data to file |
| 616 | static int dump_index = 0; | 600 | static int dump_index = 0; |
| 617 | std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); | 601 | std::string filename = |
| 602 | std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); | ||
| 618 | u32 row_stride = texture_config.width * 3; | 603 | u32 row_stride = texture_config.width * 3; |
| 619 | 604 | ||
| 620 | u8* buf; | 605 | u8* buf; |
| @@ -632,7 +617,6 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | |||
| 632 | if (png_ptr == nullptr) { | 617 | if (png_ptr == nullptr) { |
| 633 | LOG_ERROR(Debug_GPU, "Could not allocate write struct"); | 618 | LOG_ERROR(Debug_GPU, "Could not allocate write struct"); |
| 634 | goto finalise; | 619 | goto finalise; |
| 635 | |||
| 636 | } | 620 | } |
| 637 | 621 | ||
| 638 | // Initialize info structure | 622 | // Initialize info structure |
| @@ -651,9 +635,9 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | |||
| 651 | png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile); | 635 | png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile); |
| 652 | 636 | ||
| 653 | // Write header (8 bit color depth) | 637 | // Write header (8 bit color depth) |
| 654 | png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, | 638 | png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 8, |
| 655 | 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, | 639 | PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, |
| 656 | PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); | 640 | PNG_FILTER_TYPE_BASE); |
| 657 | 641 | ||
| 658 | png_text title_text; | 642 | png_text title_text; |
| 659 | title_text.compression = PNG_TEXT_COMPRESSION_NONE; | 643 | title_text.compression = PNG_TEXT_COMPRESSION_NONE; |
| @@ -672,15 +656,14 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | |||
| 672 | info.stride = row_stride; | 656 | info.stride = row_stride; |
| 673 | info.format = g_state.regs.texture0_format; | 657 | info.format = g_state.regs.texture0_format; |
| 674 | Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); | 658 | Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); |
| 675 | buf[3 * x + y * row_stride ] = texture_color.r(); | 659 | buf[3 * x + y * row_stride] = texture_color.r(); |
| 676 | buf[3 * x + y * row_stride + 1] = texture_color.g(); | 660 | buf[3 * x + y * row_stride + 1] = texture_color.g(); |
| 677 | buf[3 * x + y * row_stride + 2] = texture_color.b(); | 661 | buf[3 * x + y * row_stride + 2] = texture_color.b(); |
| 678 | } | 662 | } |
| 679 | } | 663 | } |
| 680 | 664 | ||
| 681 | // Write image data | 665 | // Write image data |
| 682 | for (unsigned y = 0; y < texture_config.height; ++y) | 666 | for (unsigned y = 0; y < texture_config.height; ++y) { |
| 683 | { | ||
| 684 | u8* row_ptr = (u8*)buf + y * row_stride; | 667 | u8* row_ptr = (u8*)buf + y * row_stride; |
| 685 | png_write_row(png_ptr, row_ptr); | 668 | png_write_row(png_ptr, row_ptr); |
| 686 | } | 669 | } |
| @@ -691,12 +674,15 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | |||
| 691 | png_write_end(png_ptr, nullptr); | 674 | png_write_end(png_ptr, nullptr); |
| 692 | 675 | ||
| 693 | finalise: | 676 | finalise: |
| 694 | if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); | 677 | if (info_ptr != nullptr) |
| 695 | if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr); | 678 | png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); |
| 679 | if (png_ptr != nullptr) | ||
| 680 | png_destroy_write_struct(&png_ptr, (png_infopp) nullptr); | ||
| 696 | #endif | 681 | #endif |
| 697 | } | 682 | } |
| 698 | 683 | ||
| 699 | static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) { | 684 | static std::string ReplacePattern(const std::string& input, const std::string& pattern, |
| 685 | const std::string& replacement) { | ||
| 700 | size_t start = input.find(pattern); | 686 | size_t start = input.find(pattern); |
| 701 | if (start == std::string::npos) | 687 | if (start == std::string::npos) |
| 702 | return input; | 688 | return input; |
| @@ -709,16 +695,16 @@ static std::string ReplacePattern(const std::string& input, const std::string& p | |||
| 709 | static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) { | 695 | static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) { |
| 710 | using Source = Pica::Regs::TevStageConfig::Source; | 696 | using Source = Pica::Regs::TevStageConfig::Source; |
| 711 | static const std::map<Source, std::string> source_map = { | 697 | static const std::map<Source, std::string> source_map = { |
| 712 | { Source::PrimaryColor, "PrimaryColor" }, | 698 | {Source::PrimaryColor, "PrimaryColor"}, |
| 713 | { Source::PrimaryFragmentColor, "PrimaryFragmentColor" }, | 699 | {Source::PrimaryFragmentColor, "PrimaryFragmentColor"}, |
| 714 | { Source::SecondaryFragmentColor, "SecondaryFragmentColor" }, | 700 | {Source::SecondaryFragmentColor, "SecondaryFragmentColor"}, |
| 715 | { Source::Texture0, "Texture0" }, | 701 | {Source::Texture0, "Texture0"}, |
| 716 | { Source::Texture1, "Texture1" }, | 702 | {Source::Texture1, "Texture1"}, |
| 717 | { Source::Texture2, "Texture2" }, | 703 | {Source::Texture2, "Texture2"}, |
| 718 | { Source::Texture3, "Texture3" }, | 704 | {Source::Texture3, "Texture3"}, |
| 719 | { Source::PreviousBuffer, "PreviousBuffer" }, | 705 | {Source::PreviousBuffer, "PreviousBuffer"}, |
| 720 | { Source::Constant, "Constant" }, | 706 | {Source::Constant, "Constant"}, |
| 721 | { Source::Previous, "Previous" }, | 707 | {Source::Previous, "Previous"}, |
| 722 | }; | 708 | }; |
| 723 | 709 | ||
| 724 | const auto src_it = source_map.find(source); | 710 | const auto src_it = source_map.find(source); |
| @@ -728,19 +714,21 @@ static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfi | |||
| 728 | return src_it->second; | 714 | return src_it->second; |
| 729 | } | 715 | } |
| 730 | 716 | ||
| 731 | static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) { | 717 | static std::string GetTevStageConfigColorSourceString( |
| 718 | const Pica::Regs::TevStageConfig::Source& source, | ||
| 719 | const Pica::Regs::TevStageConfig::ColorModifier modifier) { | ||
| 732 | using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; | 720 | using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; |
| 733 | static const std::map<ColorModifier, std::string> color_modifier_map = { | 721 | static const std::map<ColorModifier, std::string> color_modifier_map = { |
| 734 | { ColorModifier::SourceColor, "%source.rgb" }, | 722 | {ColorModifier::SourceColor, "%source.rgb"}, |
| 735 | { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" }, | 723 | {ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)"}, |
| 736 | { ColorModifier::SourceAlpha, "%source.aaa" }, | 724 | {ColorModifier::SourceAlpha, "%source.aaa"}, |
| 737 | { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" }, | 725 | {ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)"}, |
| 738 | { ColorModifier::SourceRed, "%source.rrr" }, | 726 | {ColorModifier::SourceRed, "%source.rrr"}, |
| 739 | { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" }, | 727 | {ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)"}, |
| 740 | { ColorModifier::SourceGreen, "%source.ggg" }, | 728 | {ColorModifier::SourceGreen, "%source.ggg"}, |
| 741 | { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" }, | 729 | {ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)"}, |
| 742 | { ColorModifier::SourceBlue, "%source.bbb" }, | 730 | {ColorModifier::SourceBlue, "%source.bbb"}, |
| 743 | { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" }, | 731 | {ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)"}, |
| 744 | }; | 732 | }; |
| 745 | 733 | ||
| 746 | auto src_str = GetTevStageConfigSourceString(source); | 734 | auto src_str = GetTevStageConfigSourceString(source); |
| @@ -752,17 +740,19 @@ static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStage | |||
| 752 | return ReplacePattern(modifier_str, "%source", src_str); | 740 | return ReplacePattern(modifier_str, "%source", src_str); |
| 753 | } | 741 | } |
| 754 | 742 | ||
| 755 | static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) { | 743 | static std::string GetTevStageConfigAlphaSourceString( |
| 744 | const Pica::Regs::TevStageConfig::Source& source, | ||
| 745 | const Pica::Regs::TevStageConfig::AlphaModifier modifier) { | ||
| 756 | using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; | 746 | using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; |
| 757 | static const std::map<AlphaModifier, std::string> alpha_modifier_map = { | 747 | static const std::map<AlphaModifier, std::string> alpha_modifier_map = { |
| 758 | { AlphaModifier::SourceAlpha, "%source.a" }, | 748 | {AlphaModifier::SourceAlpha, "%source.a"}, |
| 759 | { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" }, | 749 | {AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)"}, |
| 760 | { AlphaModifier::SourceRed, "%source.r" }, | 750 | {AlphaModifier::SourceRed, "%source.r"}, |
| 761 | { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" }, | 751 | {AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)"}, |
| 762 | { AlphaModifier::SourceGreen, "%source.g" }, | 752 | {AlphaModifier::SourceGreen, "%source.g"}, |
| 763 | { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" }, | 753 | {AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)"}, |
| 764 | { AlphaModifier::SourceBlue, "%source.b" }, | 754 | {AlphaModifier::SourceBlue, "%source.b"}, |
| 765 | { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" }, | 755 | {AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)"}, |
| 766 | }; | 756 | }; |
| 767 | 757 | ||
| 768 | auto src_str = GetTevStageConfigSourceString(source); | 758 | auto src_str = GetTevStageConfigSourceString(source); |
| @@ -774,18 +764,19 @@ static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStage | |||
| 774 | return ReplacePattern(modifier_str, "%source", src_str); | 764 | return ReplacePattern(modifier_str, "%source", src_str); |
| 775 | } | 765 | } |
| 776 | 766 | ||
| 777 | static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) { | 767 | static std::string GetTevStageConfigOperationString( |
| 768 | const Pica::Regs::TevStageConfig::Operation& operation) { | ||
| 778 | using Operation = Pica::Regs::TevStageConfig::Operation; | 769 | using Operation = Pica::Regs::TevStageConfig::Operation; |
| 779 | static const std::map<Operation, std::string> combiner_map = { | 770 | static const std::map<Operation, std::string> combiner_map = { |
| 780 | { Operation::Replace, "%source1" }, | 771 | {Operation::Replace, "%source1"}, |
| 781 | { Operation::Modulate, "(%source1 * %source2)" }, | 772 | {Operation::Modulate, "(%source1 * %source2)"}, |
| 782 | { Operation::Add, "(%source1 + %source2)" }, | 773 | {Operation::Add, "(%source1 + %source2)"}, |
| 783 | { Operation::AddSigned, "(%source1 + %source2) - 0.5" }, | 774 | {Operation::AddSigned, "(%source1 + %source2) - 0.5"}, |
| 784 | { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, | 775 | {Operation::Lerp, "lerp(%source1, %source2, %source3)"}, |
| 785 | { Operation::Subtract, "(%source1 - %source2)" }, | 776 | {Operation::Subtract, "(%source1 - %source2)"}, |
| 786 | { Operation::Dot3_RGB, "dot(%source1, %source2)" }, | 777 | {Operation::Dot3_RGB, "dot(%source1, %source2)"}, |
| 787 | { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" }, | 778 | {Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)"}, |
| 788 | { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" }, | 779 | {Operation::AddThenMultiply, "((%source1 + %source2) * %source3)"}, |
| 789 | }; | 780 | }; |
| 790 | 781 | ||
| 791 | const auto op_it = combiner_map.find(operation); | 782 | const auto op_it = combiner_map.find(operation); |
| @@ -797,23 +788,37 @@ static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageCo | |||
| 797 | 788 | ||
| 798 | std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { | 789 | std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { |
| 799 | auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); | 790 | auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); |
| 800 | op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); | 791 | op_str = ReplacePattern( |
| 801 | op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); | 792 | op_str, "%source1", |
| 802 | return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); | 793 | GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); |
| 794 | op_str = ReplacePattern( | ||
| 795 | op_str, "%source2", | ||
| 796 | GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); | ||
| 797 | return ReplacePattern( | ||
| 798 | op_str, "%source3", | ||
| 799 | GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); | ||
| 803 | } | 800 | } |
| 804 | 801 | ||
| 805 | std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { | 802 | std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { |
| 806 | auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); | 803 | auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); |
| 807 | op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); | 804 | op_str = ReplacePattern( |
| 808 | op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); | 805 | op_str, "%source1", |
| 809 | return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); | 806 | GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); |
| 807 | op_str = ReplacePattern( | ||
| 808 | op_str, "%source2", | ||
| 809 | GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); | ||
| 810 | return ReplacePattern( | ||
| 811 | op_str, "%source3", | ||
| 812 | GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); | ||
| 810 | } | 813 | } |
| 811 | 814 | ||
| 812 | void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) { | 815 | void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) { |
| 813 | std::string stage_info = "Tev setup:\n"; | 816 | std::string stage_info = "Tev setup:\n"; |
| 814 | for (size_t index = 0; index < stages.size(); ++index) { | 817 | for (size_t index = 0; index < stages.size(); ++index) { |
| 815 | const auto& tev_stage = stages[index]; | 818 | const auto& tev_stage = stages[index]; |
| 816 | stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; | 819 | stage_info += "Stage " + std::to_string(index) + ": " + |
| 820 | GetTevStageConfigColorCombinerString(tev_stage) + " " + | ||
| 821 | GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; | ||
| 817 | } | 822 | } |
| 818 | LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); | 823 | LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); |
| 819 | } | 824 | } |
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 92e9734ae..189c93abb 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h | |||
| @@ -15,10 +15,8 @@ | |||
| 15 | #include <string> | 15 | #include <string> |
| 16 | #include <utility> | 16 | #include <utility> |
| 17 | #include <vector> | 17 | #include <vector> |
| 18 | |||
| 19 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 20 | #include "common/vector_math.h" | 19 | #include "common/vector_math.h" |
| 21 | |||
| 22 | #include "video_core/pica.h" | 20 | #include "video_core/pica.h" |
| 23 | 21 | ||
| 24 | namespace CiTrace { | 22 | namespace CiTrace { |
| @@ -53,13 +51,16 @@ public: | |||
| 53 | * Most importantly this is used for our debugger GUI. | 51 | * Most importantly this is used for our debugger GUI. |
| 54 | * | 52 | * |
| 55 | * To implement event handling, override the OnPicaBreakPointHit and OnPicaResume methods. | 53 | * To implement event handling, override the OnPicaBreakPointHit and OnPicaResume methods. |
| 56 | * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state access | 54 | * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state |
| 57 | * @todo Evaluate an alternative interface, in which there is only one managing observer and multiple child observers running (by design) on the same thread. | 55 | * access |
| 56 | * @todo Evaluate an alternative interface, in which there is only one managing observer and | ||
| 57 | * multiple child observers running (by design) on the same thread. | ||
| 58 | */ | 58 | */ |
| 59 | class BreakPointObserver { | 59 | class BreakPointObserver { |
| 60 | public: | 60 | public: |
| 61 | /// Constructs the object such that it observes events of the given DebugContext. | 61 | /// Constructs the object such that it observes events of the given DebugContext. |
| 62 | BreakPointObserver(std::shared_ptr<DebugContext> debug_context) : context_weak(debug_context) { | 62 | BreakPointObserver(std::shared_ptr<DebugContext> debug_context) |
| 63 | : context_weak(debug_context) { | ||
| 63 | std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); | 64 | std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); |
| 64 | debug_context->breakpoint_observers.push_back(this); | 65 | debug_context->breakpoint_observers.push_back(this); |
| 65 | } | 66 | } |
| @@ -84,15 +85,13 @@ public: | |||
| 84 | * @param data Optional data pointer (if unused, this is a nullptr) | 85 | * @param data Optional data pointer (if unused, this is a nullptr) |
| 85 | * @note This function will perform nothing unless it is overridden in the child class. | 86 | * @note This function will perform nothing unless it is overridden in the child class. |
| 86 | */ | 87 | */ |
| 87 | virtual void OnPicaBreakPointHit(Event, void*) { | 88 | virtual void OnPicaBreakPointHit(Event, void*) {} |
| 88 | } | ||
| 89 | 89 | ||
| 90 | /** | 90 | /** |
| 91 | * Action to perform when emulation is resumed from a breakpoint. | 91 | * Action to perform when emulation is resumed from a breakpoint. |
| 92 | * @note This function will perform nothing unless it is overridden in the child class. | 92 | * @note This function will perform nothing unless it is overridden in the child class. |
| 93 | */ | 93 | */ |
| 94 | virtual void OnPicaResume() { | 94 | virtual void OnPicaResume() {} |
| 95 | } | ||
| 96 | 95 | ||
| 97 | protected: | 96 | protected: |
| 98 | /** | 97 | /** |
| @@ -122,7 +121,8 @@ public: | |||
| 122 | * The current thread then is halted until Resume() is called from another thread (or until | 121 | * The current thread then is halted until Resume() is called from another thread (or until |
| 123 | * emulation is stopped). | 122 | * emulation is stopped). |
| 124 | * @param event Event which has happened | 123 | * @param event Event which has happened |
| 125 | * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. | 124 | * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until |
| 125 | * Resume() is called. | ||
| 126 | */ | 126 | */ |
| 127 | void OnEvent(Event event, void* data) { | 127 | void OnEvent(Event event, void* data) { |
| 128 | // This check is left in the header to allow the compiler to inline it. | 128 | // This check is left in the header to allow the compiler to inline it. |
| @@ -132,11 +132,12 @@ public: | |||
| 132 | DoOnEvent(event, data); | 132 | DoOnEvent(event, data); |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | void DoOnEvent(Event event, void *data); | 135 | void DoOnEvent(Event event, void* data); |
| 136 | 136 | ||
| 137 | /** | 137 | /** |
| 138 | * Resume from the current breakpoint. | 138 | * Resume from the current breakpoint. |
| 139 | * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. Calling from any other thread is safe. | 139 | * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. |
| 140 | * Calling from any other thread is safe. | ||
| 140 | */ | 141 | */ |
| 141 | void Resume(); | 142 | void Resume(); |
| 142 | 143 | ||
| @@ -144,7 +145,7 @@ public: | |||
| 144 | * Delete all set breakpoints and resume emulation. | 145 | * Delete all set breakpoints and resume emulation. |
| 145 | */ | 146 | */ |
| 146 | void ClearBreakpoints() { | 147 | void ClearBreakpoints() { |
| 147 | for (auto &bp : breakpoints) { | 148 | for (auto& bp : breakpoints) { |
| 148 | bp.enabled = false; | 149 | bp.enabled = false; |
| 149 | } | 150 | } |
| 150 | Resume(); | 151 | Resume(); |
| @@ -182,8 +183,8 @@ namespace DebugUtils { | |||
| 182 | #define PICA_LOG_TEV 0 | 183 | #define PICA_LOG_TEV 0 |
| 183 | 184 | ||
| 184 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, | 185 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, |
| 185 | const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); | 186 | const Shader::ShaderSetup& setup, |
| 186 | 187 | const Regs::VSOutputAttributes* output_attributes); | |
| 187 | 188 | ||
| 188 | // Utility class to log Pica commands. | 189 | // Utility class to log Pica commands. |
| 189 | struct PicaTrace { | 190 | struct PicaTrace { |
| @@ -216,7 +217,10 @@ struct TextureInfo { | |||
| 216 | * @param source Source pointer to read data from | 217 | * @param source Source pointer to read data from |
| 217 | * @param s,t Texture coordinates to read from | 218 | * @param s,t Texture coordinates to read from |
| 218 | * @param info TextureInfo object describing the texture setup | 219 | * @param info TextureInfo object describing the texture setup |
| 219 | * @param disable_alpha This is used for debug widgets which use this method to display textures without providing a good way to visualize alpha by themselves. If true, this will return 255 for the alpha component, and either drop the information entirely or store it in an "unused" color channel. | 220 | * @param disable_alpha This is used for debug widgets which use this method to display textures |
| 221 | * without providing a good way to visualize alpha by themselves. If true, this will return 255 for | ||
| 222 | * the alpha component, and either drop the information entirely or store it in an "unused" color | ||
| 223 | * channel. | ||
| 220 | * @todo Eventually we should get rid of the disable_alpha parameter. | 224 | * @todo Eventually we should get rid of the disable_alpha parameter. |
| 221 | */ | 225 | */ |
| 222 | const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info, | 226 | const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info, |
| @@ -237,7 +241,8 @@ class MemoryAccessTracker { | |||
| 237 | /// Combine overlapping and close ranges | 241 | /// Combine overlapping and close ranges |
| 238 | void SimplifyRanges() { | 242 | void SimplifyRanges() { |
| 239 | for (auto it = ranges.begin(); it != ranges.end(); ++it) { | 243 | for (auto it = ranges.begin(); it != ranges.end(); ++it) { |
| 240 | // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too | 244 | // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, |
| 245 | // too | ||
| 241 | auto it2 = std::next(it); | 246 | auto it2 = std::next(it); |
| 242 | while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { | 247 | while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { |
| 243 | it->second = std::max(it->second, it2->first + it2->second - it->first); | 248 | it->second = std::max(it->second, it2->first + it2->second - it->first); |
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index a3aab216c..3c6636d66 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h | |||
| @@ -7,20 +7,16 @@ | |||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <functional> | 8 | #include <functional> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | |||
| 11 | #include "core/hle/service/gsp_gpu.h" | 10 | #include "core/hle/service/gsp_gpu.h" |
| 12 | 11 | ||
| 13 | class GraphicsDebugger | 12 | class GraphicsDebugger { |
| 14 | { | ||
| 15 | public: | 13 | public: |
| 16 | // Base class for all objects which need to be notified about GPU events | 14 | // Base class for all objects which need to be notified about GPU events |
| 17 | class DebuggerObserver | 15 | class DebuggerObserver { |
| 18 | { | ||
| 19 | public: | 16 | public: |
| 20 | DebuggerObserver() : observed(nullptr) { } | 17 | DebuggerObserver() : observed(nullptr) {} |
| 21 | 18 | ||
| 22 | virtual ~DebuggerObserver() | 19 | virtual ~DebuggerObserver() { |
| 23 | { | ||
| 24 | if (observed) | 20 | if (observed) |
| 25 | observed->UnregisterObserver(this); | 21 | observed->UnregisterObserver(this); |
| 26 | } | 22 | } |
| @@ -31,15 +27,13 @@ public: | |||
| 31 | * @param total_command_count Total number of commands in the GX history | 27 | * @param total_command_count Total number of commands in the GX history |
| 32 | * @note All methods in this class are called from the GSP thread | 28 | * @note All methods in this class are called from the GSP thread |
| 33 | */ | 29 | */ |
| 34 | virtual void GXCommandProcessed(int total_command_count) | 30 | virtual void GXCommandProcessed(int total_command_count) { |
| 35 | { | 31 | const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count - 1); |
| 36 | const GSP_GPU::Command& cmd = observed->ReadGXCommandHistory(total_command_count-1); | ||
| 37 | LOG_TRACE(Debug_GPU, "Received command: id=%x", (int)cmd.id.Value()); | 32 | LOG_TRACE(Debug_GPU, "Received command: id=%x", (int)cmd.id.Value()); |
| 38 | } | 33 | } |
| 39 | 34 | ||
| 40 | protected: | 35 | protected: |
| 41 | const GraphicsDebugger* GetDebugger() const | 36 | const GraphicsDebugger* GetDebugger() const { |
| 42 | { | ||
| 43 | return observed; | 37 | return observed; |
| 44 | } | 38 | } |
| 45 | 39 | ||
| @@ -49,8 +43,7 @@ public: | |||
| 49 | friend class GraphicsDebugger; | 43 | friend class GraphicsDebugger; |
| 50 | }; | 44 | }; |
| 51 | 45 | ||
| 52 | void GXCommandProcessed(u8* command_data) | 46 | void GXCommandProcessed(u8* command_data) { |
| 53 | { | ||
| 54 | if (observers.empty()) | 47 | if (observers.empty()) |
| 55 | return; | 48 | return; |
| 56 | 49 | ||
| @@ -60,33 +53,29 @@ public: | |||
| 60 | memcpy(&cmd, command_data, sizeof(GSP_GPU::Command)); | 53 | memcpy(&cmd, command_data, sizeof(GSP_GPU::Command)); |
| 61 | 54 | ||
| 62 | ForEachObserver([this](DebuggerObserver* observer) { | 55 | ForEachObserver([this](DebuggerObserver* observer) { |
| 63 | observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size())); | 56 | observer->GXCommandProcessed(static_cast<int>(this->gx_command_history.size())); |
| 64 | } ); | 57 | }); |
| 65 | } | 58 | } |
| 66 | 59 | ||
| 67 | const GSP_GPU::Command& ReadGXCommandHistory(int index) const | 60 | const GSP_GPU::Command& ReadGXCommandHistory(int index) const { |
| 68 | { | ||
| 69 | // TODO: Is this thread-safe? | 61 | // TODO: Is this thread-safe? |
| 70 | return gx_command_history[index]; | 62 | return gx_command_history[index]; |
| 71 | } | 63 | } |
| 72 | 64 | ||
| 73 | void RegisterObserver(DebuggerObserver* observer) | 65 | void RegisterObserver(DebuggerObserver* observer) { |
| 74 | { | ||
| 75 | // TODO: Check for duplicates | 66 | // TODO: Check for duplicates |
| 76 | observers.push_back(observer); | 67 | observers.push_back(observer); |
| 77 | observer->observed = this; | 68 | observer->observed = this; |
| 78 | } | 69 | } |
| 79 | 70 | ||
| 80 | void UnregisterObserver(DebuggerObserver* observer) | 71 | void UnregisterObserver(DebuggerObserver* observer) { |
| 81 | { | ||
| 82 | observers.erase(std::remove(observers.begin(), observers.end(), observer), observers.end()); | 72 | observers.erase(std::remove(observers.begin(), observers.end(), observer), observers.end()); |
| 83 | observer->observed = nullptr; | 73 | observer->observed = nullptr; |
| 84 | } | 74 | } |
| 85 | 75 | ||
| 86 | private: | 76 | private: |
| 87 | void ForEachObserver(std::function<void (DebuggerObserver*)> func) | 77 | void ForEachObserver(std::function<void(DebuggerObserver*)> func) { |
| 88 | { | 78 | std::for_each(observers.begin(), observers.end(), func); |
| 89 | std::for_each(observers.begin(),observers.end(), func); | ||
| 90 | } | 79 | } |
| 91 | 80 | ||
| 92 | std::vector<DebuggerObserver*> observers; | 81 | std::vector<DebuggerObserver*> observers; |
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index ec78f9593..ce2bd455e 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | #include <iterator> | 6 | #include <iterator> |
| 7 | #include <unordered_map> | 7 | #include <unordered_map> |
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | |||
| 10 | #include "video_core/pica.h" | 9 | #include "video_core/pica.h" |
| 11 | #include "video_core/pica_state.h" | 10 | #include "video_core/pica_state.h" |
| 12 | #include "video_core/primitive_assembly.h" | 11 | #include "video_core/primitive_assembly.h" |
| @@ -17,466 +16,466 @@ namespace Pica { | |||
| 17 | State g_state; | 16 | State g_state; |
| 18 | 17 | ||
| 19 | static const std::pair<u16, const char*> register_names[] = { | 18 | static const std::pair<u16, const char*> register_names[] = { |
| 20 | { 0x010, "GPUREG_FINALIZE" }, | 19 | {0x010, "GPUREG_FINALIZE"}, |
| 21 | 20 | ||
| 22 | { 0x040, "GPUREG_FACECULLING_CONFIG" }, | 21 | {0x040, "GPUREG_FACECULLING_CONFIG"}, |
| 23 | { 0x041, "GPUREG_VIEWPORT_WIDTH" }, | 22 | {0x041, "GPUREG_VIEWPORT_WIDTH"}, |
| 24 | { 0x042, "GPUREG_VIEWPORT_INVW" }, | 23 | {0x042, "GPUREG_VIEWPORT_INVW"}, |
| 25 | { 0x043, "GPUREG_VIEWPORT_HEIGHT" }, | 24 | {0x043, "GPUREG_VIEWPORT_HEIGHT"}, |
| 26 | { 0x044, "GPUREG_VIEWPORT_INVH" }, | 25 | {0x044, "GPUREG_VIEWPORT_INVH"}, |
| 27 | 26 | ||
| 28 | { 0x047, "GPUREG_FRAGOP_CLIP" }, | 27 | {0x047, "GPUREG_FRAGOP_CLIP"}, |
| 29 | { 0x048, "GPUREG_FRAGOP_CLIP_DATA0" }, | 28 | {0x048, "GPUREG_FRAGOP_CLIP_DATA0"}, |
| 30 | { 0x049, "GPUREG_FRAGOP_CLIP_DATA1" }, | 29 | {0x049, "GPUREG_FRAGOP_CLIP_DATA1"}, |
| 31 | { 0x04A, "GPUREG_FRAGOP_CLIP_DATA2" }, | 30 | {0x04A, "GPUREG_FRAGOP_CLIP_DATA2"}, |
| 32 | { 0x04B, "GPUREG_FRAGOP_CLIP_DATA3" }, | 31 | {0x04B, "GPUREG_FRAGOP_CLIP_DATA3"}, |
| 33 | 32 | ||
| 34 | { 0x04D, "GPUREG_DEPTHMAP_SCALE" }, | 33 | {0x04D, "GPUREG_DEPTHMAP_SCALE"}, |
| 35 | { 0x04E, "GPUREG_DEPTHMAP_OFFSET" }, | 34 | {0x04E, "GPUREG_DEPTHMAP_OFFSET"}, |
| 36 | { 0x04F, "GPUREG_SH_OUTMAP_TOTAL" }, | 35 | {0x04F, "GPUREG_SH_OUTMAP_TOTAL"}, |
| 37 | { 0x050, "GPUREG_SH_OUTMAP_O0" }, | 36 | {0x050, "GPUREG_SH_OUTMAP_O0"}, |
| 38 | { 0x051, "GPUREG_SH_OUTMAP_O1" }, | 37 | {0x051, "GPUREG_SH_OUTMAP_O1"}, |
| 39 | { 0x052, "GPUREG_SH_OUTMAP_O2" }, | 38 | {0x052, "GPUREG_SH_OUTMAP_O2"}, |
| 40 | { 0x053, "GPUREG_SH_OUTMAP_O3" }, | 39 | {0x053, "GPUREG_SH_OUTMAP_O3"}, |
| 41 | { 0x054, "GPUREG_SH_OUTMAP_O4" }, | 40 | {0x054, "GPUREG_SH_OUTMAP_O4"}, |
| 42 | { 0x055, "GPUREG_SH_OUTMAP_O5" }, | 41 | {0x055, "GPUREG_SH_OUTMAP_O5"}, |
| 43 | { 0x056, "GPUREG_SH_OUTMAP_O6" }, | 42 | {0x056, "GPUREG_SH_OUTMAP_O6"}, |
| 44 | 43 | ||
| 45 | { 0x061, "GPUREG_EARLYDEPTH_FUNC" }, | 44 | {0x061, "GPUREG_EARLYDEPTH_FUNC"}, |
| 46 | { 0x062, "GPUREG_EARLYDEPTH_TEST1" }, | 45 | {0x062, "GPUREG_EARLYDEPTH_TEST1"}, |
| 47 | { 0x063, "GPUREG_EARLYDEPTH_CLEAR" }, | 46 | {0x063, "GPUREG_EARLYDEPTH_CLEAR"}, |
| 48 | { 0x064, "GPUREG_SH_OUTATTR_MODE" }, | 47 | {0x064, "GPUREG_SH_OUTATTR_MODE"}, |
| 49 | { 0x065, "GPUREG_SCISSORTEST_MODE" }, | 48 | {0x065, "GPUREG_SCISSORTEST_MODE"}, |
| 50 | { 0x066, "GPUREG_SCISSORTEST_POS" }, | 49 | {0x066, "GPUREG_SCISSORTEST_POS"}, |
| 51 | { 0x067, "GPUREG_SCISSORTEST_DIM" }, | 50 | {0x067, "GPUREG_SCISSORTEST_DIM"}, |
| 52 | { 0x068, "GPUREG_VIEWPORT_XY" }, | 51 | {0x068, "GPUREG_VIEWPORT_XY"}, |
| 53 | 52 | ||
| 54 | { 0x06A, "GPUREG_EARLYDEPTH_DATA" }, | 53 | {0x06A, "GPUREG_EARLYDEPTH_DATA"}, |
| 55 | 54 | ||
| 56 | { 0x06D, "GPUREG_DEPTHMAP_ENABLE" }, | 55 | {0x06D, "GPUREG_DEPTHMAP_ENABLE"}, |
| 57 | { 0x06E, "GPUREG_RENDERBUF_DIM" }, | 56 | {0x06E, "GPUREG_RENDERBUF_DIM"}, |
| 58 | { 0x06F, "GPUREG_SH_OUTATTR_CLOCK" }, | 57 | {0x06F, "GPUREG_SH_OUTATTR_CLOCK"}, |
| 59 | 58 | ||
| 60 | { 0x080, "GPUREG_TEXUNIT_CONFIG" }, | 59 | {0x080, "GPUREG_TEXUNIT_CONFIG"}, |
| 61 | { 0x081, "GPUREG_TEXUNIT0_BORDER_COLOR" }, | 60 | {0x081, "GPUREG_TEXUNIT0_BORDER_COLOR"}, |
| 62 | { 0x082, "GPUREG_TEXUNIT0_DIM" }, | 61 | {0x082, "GPUREG_TEXUNIT0_DIM"}, |
| 63 | { 0x083, "GPUREG_TEXUNIT0_PARAM" }, | 62 | {0x083, "GPUREG_TEXUNIT0_PARAM"}, |
| 64 | { 0x084, "GPUREG_TEXUNIT0_LOD" }, | 63 | {0x084, "GPUREG_TEXUNIT0_LOD"}, |
| 65 | { 0x085, "GPUREG_TEXUNIT0_ADDR1" }, | 64 | {0x085, "GPUREG_TEXUNIT0_ADDR1"}, |
| 66 | { 0x086, "GPUREG_TEXUNIT0_ADDR2" }, | 65 | {0x086, "GPUREG_TEXUNIT0_ADDR2"}, |
| 67 | { 0x087, "GPUREG_TEXUNIT0_ADDR3" }, | 66 | {0x087, "GPUREG_TEXUNIT0_ADDR3"}, |
| 68 | { 0x088, "GPUREG_TEXUNIT0_ADDR4" }, | 67 | {0x088, "GPUREG_TEXUNIT0_ADDR4"}, |
| 69 | { 0x089, "GPUREG_TEXUNIT0_ADDR5" }, | 68 | {0x089, "GPUREG_TEXUNIT0_ADDR5"}, |
| 70 | { 0x08A, "GPUREG_TEXUNIT0_ADDR6" }, | 69 | {0x08A, "GPUREG_TEXUNIT0_ADDR6"}, |
| 71 | { 0x08B, "GPUREG_TEXUNIT0_SHADOW" }, | 70 | {0x08B, "GPUREG_TEXUNIT0_SHADOW"}, |
| 72 | 71 | ||
| 73 | { 0x08E, "GPUREG_TEXUNIT0_TYPE" }, | 72 | {0x08E, "GPUREG_TEXUNIT0_TYPE"}, |
| 74 | { 0x08F, "GPUREG_LIGHTING_ENABLE0" }, | 73 | {0x08F, "GPUREG_LIGHTING_ENABLE0"}, |
| 75 | 74 | ||
| 76 | { 0x091, "GPUREG_TEXUNIT1_BORDER_COLOR" }, | 75 | {0x091, "GPUREG_TEXUNIT1_BORDER_COLOR"}, |
| 77 | { 0x092, "GPUREG_TEXUNIT1_DIM" }, | 76 | {0x092, "GPUREG_TEXUNIT1_DIM"}, |
| 78 | { 0x093, "GPUREG_TEXUNIT1_PARAM" }, | 77 | {0x093, "GPUREG_TEXUNIT1_PARAM"}, |
| 79 | { 0x094, "GPUREG_TEXUNIT1_LOD" }, | 78 | {0x094, "GPUREG_TEXUNIT1_LOD"}, |
| 80 | { 0x095, "GPUREG_TEXUNIT1_ADDR" }, | 79 | {0x095, "GPUREG_TEXUNIT1_ADDR"}, |
| 81 | { 0x096, "GPUREG_TEXUNIT1_TYPE" }, | 80 | {0x096, "GPUREG_TEXUNIT1_TYPE"}, |
| 82 | 81 | ||
| 83 | { 0x099, "GPUREG_TEXUNIT2_BORDER_COLOR" }, | 82 | {0x099, "GPUREG_TEXUNIT2_BORDER_COLOR"}, |
| 84 | { 0x09A, "GPUREG_TEXUNIT2_DIM" }, | 83 | {0x09A, "GPUREG_TEXUNIT2_DIM"}, |
| 85 | { 0x09B, "GPUREG_TEXUNIT2_PARAM" }, | 84 | {0x09B, "GPUREG_TEXUNIT2_PARAM"}, |
| 86 | { 0x09C, "GPUREG_TEXUNIT2_LOD" }, | 85 | {0x09C, "GPUREG_TEXUNIT2_LOD"}, |
| 87 | { 0x09D, "GPUREG_TEXUNIT2_ADDR" }, | 86 | {0x09D, "GPUREG_TEXUNIT2_ADDR"}, |
| 88 | { 0x09E, "GPUREG_TEXUNIT2_TYPE" }, | 87 | {0x09E, "GPUREG_TEXUNIT2_TYPE"}, |
| 89 | 88 | ||
| 90 | { 0x0A8, "GPUREG_TEXUNIT3_PROCTEX0" }, | 89 | {0x0A8, "GPUREG_TEXUNIT3_PROCTEX0"}, |
| 91 | { 0x0A9, "GPUREG_TEXUNIT3_PROCTEX1" }, | 90 | {0x0A9, "GPUREG_TEXUNIT3_PROCTEX1"}, |
| 92 | { 0x0AA, "GPUREG_TEXUNIT3_PROCTEX2" }, | 91 | {0x0AA, "GPUREG_TEXUNIT3_PROCTEX2"}, |
| 93 | { 0x0AB, "GPUREG_TEXUNIT3_PROCTEX3" }, | 92 | {0x0AB, "GPUREG_TEXUNIT3_PROCTEX3"}, |
| 94 | { 0x0AC, "GPUREG_TEXUNIT3_PROCTEX4" }, | 93 | {0x0AC, "GPUREG_TEXUNIT3_PROCTEX4"}, |
| 95 | { 0x0AD, "GPUREG_TEXUNIT3_PROCTEX5" }, | 94 | {0x0AD, "GPUREG_TEXUNIT3_PROCTEX5"}, |
| 96 | 95 | ||
| 97 | { 0x0AF, "GPUREG_PROCTEX_LUT" }, | 96 | {0x0AF, "GPUREG_PROCTEX_LUT"}, |
| 98 | { 0x0B0, "GPUREG_PROCTEX_LUT_DATA0" }, | 97 | {0x0B0, "GPUREG_PROCTEX_LUT_DATA0"}, |
| 99 | { 0x0B1, "GPUREG_PROCTEX_LUT_DATA1" }, | 98 | {0x0B1, "GPUREG_PROCTEX_LUT_DATA1"}, |
| 100 | { 0x0B2, "GPUREG_PROCTEX_LUT_DATA2" }, | 99 | {0x0B2, "GPUREG_PROCTEX_LUT_DATA2"}, |
| 101 | { 0x0B3, "GPUREG_PROCTEX_LUT_DATA3" }, | 100 | {0x0B3, "GPUREG_PROCTEX_LUT_DATA3"}, |
| 102 | { 0x0B4, "GPUREG_PROCTEX_LUT_DATA4" }, | 101 | {0x0B4, "GPUREG_PROCTEX_LUT_DATA4"}, |
| 103 | { 0x0B5, "GPUREG_PROCTEX_LUT_DATA5" }, | 102 | {0x0B5, "GPUREG_PROCTEX_LUT_DATA5"}, |
| 104 | { 0x0B6, "GPUREG_PROCTEX_LUT_DATA6" }, | 103 | {0x0B6, "GPUREG_PROCTEX_LUT_DATA6"}, |
| 105 | { 0x0B7, "GPUREG_PROCTEX_LUT_DATA7" }, | 104 | {0x0B7, "GPUREG_PROCTEX_LUT_DATA7"}, |
| 106 | 105 | ||
| 107 | { 0x0C0, "GPUREG_TEXENV0_SOURCE" }, | 106 | {0x0C0, "GPUREG_TEXENV0_SOURCE"}, |
| 108 | { 0x0C1, "GPUREG_TEXENV0_OPERAND" }, | 107 | {0x0C1, "GPUREG_TEXENV0_OPERAND"}, |
| 109 | { 0x0C2, "GPUREG_TEXENV0_COMBINER" }, | 108 | {0x0C2, "GPUREG_TEXENV0_COMBINER"}, |
| 110 | { 0x0C3, "GPUREG_TEXENV0_COLOR" }, | 109 | {0x0C3, "GPUREG_TEXENV0_COLOR"}, |
| 111 | { 0x0C4, "GPUREG_TEXENV0_SCALE" }, | 110 | {0x0C4, "GPUREG_TEXENV0_SCALE"}, |
| 112 | 111 | ||
| 113 | { 0x0C8, "GPUREG_TEXENV1_SOURCE" }, | 112 | {0x0C8, "GPUREG_TEXENV1_SOURCE"}, |
| 114 | { 0x0C9, "GPUREG_TEXENV1_OPERAND" }, | 113 | {0x0C9, "GPUREG_TEXENV1_OPERAND"}, |
| 115 | { 0x0CA, "GPUREG_TEXENV1_COMBINER" }, | 114 | {0x0CA, "GPUREG_TEXENV1_COMBINER"}, |
| 116 | { 0x0CB, "GPUREG_TEXENV1_COLOR" }, | 115 | {0x0CB, "GPUREG_TEXENV1_COLOR"}, |
| 117 | { 0x0CC, "GPUREG_TEXENV1_SCALE" }, | 116 | {0x0CC, "GPUREG_TEXENV1_SCALE"}, |
| 118 | 117 | ||
| 119 | { 0x0D0, "GPUREG_TEXENV2_SOURCE" }, | 118 | {0x0D0, "GPUREG_TEXENV2_SOURCE"}, |
| 120 | { 0x0D1, "GPUREG_TEXENV2_OPERAND" }, | 119 | {0x0D1, "GPUREG_TEXENV2_OPERAND"}, |
| 121 | { 0x0D2, "GPUREG_TEXENV2_COMBINER" }, | 120 | {0x0D2, "GPUREG_TEXENV2_COMBINER"}, |
| 122 | { 0x0D3, "GPUREG_TEXENV2_COLOR" }, | 121 | {0x0D3, "GPUREG_TEXENV2_COLOR"}, |
| 123 | { 0x0D4, "GPUREG_TEXENV2_SCALE" }, | 122 | {0x0D4, "GPUREG_TEXENV2_SCALE"}, |
| 124 | 123 | ||
| 125 | { 0x0D8, "GPUREG_TEXENV3_SOURCE" }, | 124 | {0x0D8, "GPUREG_TEXENV3_SOURCE"}, |
| 126 | { 0x0D9, "GPUREG_TEXENV3_OPERAND" }, | 125 | {0x0D9, "GPUREG_TEXENV3_OPERAND"}, |
| 127 | { 0x0DA, "GPUREG_TEXENV3_COMBINER" }, | 126 | {0x0DA, "GPUREG_TEXENV3_COMBINER"}, |
| 128 | { 0x0DB, "GPUREG_TEXENV3_COLOR" }, | 127 | {0x0DB, "GPUREG_TEXENV3_COLOR"}, |
| 129 | { 0x0DC, "GPUREG_TEXENV3_SCALE" }, | 128 | {0x0DC, "GPUREG_TEXENV3_SCALE"}, |
| 130 | 129 | ||
| 131 | { 0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER" }, | 130 | {0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER"}, |
| 132 | { 0x0E1, "GPUREG_FOG_COLOR" }, | 131 | {0x0E1, "GPUREG_FOG_COLOR"}, |
| 133 | 132 | ||
| 134 | { 0x0E4, "GPUREG_GAS_ATTENUATION" }, | 133 | {0x0E4, "GPUREG_GAS_ATTENUATION"}, |
| 135 | { 0x0E5, "GPUREG_GAS_ACCMAX" }, | 134 | {0x0E5, "GPUREG_GAS_ACCMAX"}, |
| 136 | { 0x0E6, "GPUREG_FOG_LUT_INDEX" }, | 135 | {0x0E6, "GPUREG_FOG_LUT_INDEX"}, |
| 137 | 136 | ||
| 138 | { 0x0E8, "GPUREG_FOG_LUT_DATA0" }, | 137 | {0x0E8, "GPUREG_FOG_LUT_DATA0"}, |
| 139 | { 0x0E9, "GPUREG_FOG_LUT_DATA1" }, | 138 | {0x0E9, "GPUREG_FOG_LUT_DATA1"}, |
| 140 | { 0x0EA, "GPUREG_FOG_LUT_DATA2" }, | 139 | {0x0EA, "GPUREG_FOG_LUT_DATA2"}, |
| 141 | { 0x0EB, "GPUREG_FOG_LUT_DATA3" }, | 140 | {0x0EB, "GPUREG_FOG_LUT_DATA3"}, |
| 142 | { 0x0EC, "GPUREG_FOG_LUT_DATA4" }, | 141 | {0x0EC, "GPUREG_FOG_LUT_DATA4"}, |
| 143 | { 0x0ED, "GPUREG_FOG_LUT_DATA5" }, | 142 | {0x0ED, "GPUREG_FOG_LUT_DATA5"}, |
| 144 | { 0x0EE, "GPUREG_FOG_LUT_DATA6" }, | 143 | {0x0EE, "GPUREG_FOG_LUT_DATA6"}, |
| 145 | { 0x0EF, "GPUREG_FOG_LUT_DATA7" }, | 144 | {0x0EF, "GPUREG_FOG_LUT_DATA7"}, |
| 146 | { 0x0F0, "GPUREG_TEXENV4_SOURCE" }, | 145 | {0x0F0, "GPUREG_TEXENV4_SOURCE"}, |
| 147 | { 0x0F1, "GPUREG_TEXENV4_OPERAND" }, | 146 | {0x0F1, "GPUREG_TEXENV4_OPERAND"}, |
| 148 | { 0x0F2, "GPUREG_TEXENV4_COMBINER" }, | 147 | {0x0F2, "GPUREG_TEXENV4_COMBINER"}, |
| 149 | { 0x0F3, "GPUREG_TEXENV4_COLOR" }, | 148 | {0x0F3, "GPUREG_TEXENV4_COLOR"}, |
| 150 | { 0x0F4, "GPUREG_TEXENV4_SCALE" }, | 149 | {0x0F4, "GPUREG_TEXENV4_SCALE"}, |
| 151 | 150 | ||
| 152 | { 0x0F8, "GPUREG_TEXENV5_SOURCE" }, | 151 | {0x0F8, "GPUREG_TEXENV5_SOURCE"}, |
| 153 | { 0x0F9, "GPUREG_TEXENV5_OPERAND" }, | 152 | {0x0F9, "GPUREG_TEXENV5_OPERAND"}, |
| 154 | { 0x0FA, "GPUREG_TEXENV5_COMBINER" }, | 153 | {0x0FA, "GPUREG_TEXENV5_COMBINER"}, |
| 155 | { 0x0FB, "GPUREG_TEXENV5_COLOR" }, | 154 | {0x0FB, "GPUREG_TEXENV5_COLOR"}, |
| 156 | { 0x0FC, "GPUREG_TEXENV5_SCALE" }, | 155 | {0x0FC, "GPUREG_TEXENV5_SCALE"}, |
| 157 | { 0x0FD, "GPUREG_TEXENV_BUFFER_COLOR" }, | 156 | {0x0FD, "GPUREG_TEXENV_BUFFER_COLOR"}, |
| 158 | 157 | ||
| 159 | { 0x100, "GPUREG_COLOR_OPERATION" }, | 158 | {0x100, "GPUREG_COLOR_OPERATION"}, |
| 160 | { 0x101, "GPUREG_BLEND_FUNC" }, | 159 | {0x101, "GPUREG_BLEND_FUNC"}, |
| 161 | { 0x102, "GPUREG_LOGIC_OP" }, | 160 | {0x102, "GPUREG_LOGIC_OP"}, |
| 162 | { 0x103, "GPUREG_BLEND_COLOR" }, | 161 | {0x103, "GPUREG_BLEND_COLOR"}, |
| 163 | { 0x104, "GPUREG_FRAGOP_ALPHA_TEST" }, | 162 | {0x104, "GPUREG_FRAGOP_ALPHA_TEST"}, |
| 164 | { 0x105, "GPUREG_STENCIL_TEST" }, | 163 | {0x105, "GPUREG_STENCIL_TEST"}, |
| 165 | { 0x106, "GPUREG_STENCIL_OP" }, | 164 | {0x106, "GPUREG_STENCIL_OP"}, |
| 166 | { 0x107, "GPUREG_DEPTH_COLOR_MASK" }, | 165 | {0x107, "GPUREG_DEPTH_COLOR_MASK"}, |
| 167 | 166 | ||
| 168 | { 0x110, "GPUREG_FRAMEBUFFER_INVALIDATE" }, | 167 | {0x110, "GPUREG_FRAMEBUFFER_INVALIDATE"}, |
| 169 | { 0x111, "GPUREG_FRAMEBUFFER_FLUSH" }, | 168 | {0x111, "GPUREG_FRAMEBUFFER_FLUSH"}, |
| 170 | { 0x112, "GPUREG_COLORBUFFER_READ" }, | 169 | {0x112, "GPUREG_COLORBUFFER_READ"}, |
| 171 | { 0x113, "GPUREG_COLORBUFFER_WRITE" }, | 170 | {0x113, "GPUREG_COLORBUFFER_WRITE"}, |
| 172 | { 0x114, "GPUREG_DEPTHBUFFER_READ" }, | 171 | {0x114, "GPUREG_DEPTHBUFFER_READ"}, |
| 173 | { 0x115, "GPUREG_DEPTHBUFFER_WRITE" }, | 172 | {0x115, "GPUREG_DEPTHBUFFER_WRITE"}, |
| 174 | { 0x116, "GPUREG_DEPTHBUFFER_FORMAT" }, | 173 | {0x116, "GPUREG_DEPTHBUFFER_FORMAT"}, |
| 175 | { 0x117, "GPUREG_COLORBUFFER_FORMAT" }, | 174 | {0x117, "GPUREG_COLORBUFFER_FORMAT"}, |
| 176 | { 0x118, "GPUREG_EARLYDEPTH_TEST2" }, | 175 | {0x118, "GPUREG_EARLYDEPTH_TEST2"}, |
| 177 | 176 | ||
| 178 | { 0x11B, "GPUREG_FRAMEBUFFER_BLOCK32" }, | 177 | {0x11B, "GPUREG_FRAMEBUFFER_BLOCK32"}, |
| 179 | { 0x11C, "GPUREG_DEPTHBUFFER_LOC" }, | 178 | {0x11C, "GPUREG_DEPTHBUFFER_LOC"}, |
| 180 | { 0x11D, "GPUREG_COLORBUFFER_LOC" }, | 179 | {0x11D, "GPUREG_COLORBUFFER_LOC"}, |
| 181 | { 0x11E, "GPUREG_FRAMEBUFFER_DIM" }, | 180 | {0x11E, "GPUREG_FRAMEBUFFER_DIM"}, |
| 182 | 181 | ||
| 183 | { 0x120, "GPUREG_GAS_LIGHT_XY" }, | 182 | {0x120, "GPUREG_GAS_LIGHT_XY"}, |
| 184 | { 0x121, "GPUREG_GAS_LIGHT_Z" }, | 183 | {0x121, "GPUREG_GAS_LIGHT_Z"}, |
| 185 | { 0x122, "GPUREG_GAS_LIGHT_Z_COLOR" }, | 184 | {0x122, "GPUREG_GAS_LIGHT_Z_COLOR"}, |
| 186 | { 0x123, "GPUREG_GAS_LUT_INDEX" }, | 185 | {0x123, "GPUREG_GAS_LUT_INDEX"}, |
| 187 | { 0x124, "GPUREG_GAS_LUT_DATA" }, | 186 | {0x124, "GPUREG_GAS_LUT_DATA"}, |
| 188 | 187 | ||
| 189 | { 0x126, "GPUREG_GAS_DELTAZ_DEPTH" }, | 188 | {0x126, "GPUREG_GAS_DELTAZ_DEPTH"}, |
| 190 | 189 | ||
| 191 | { 0x130, "GPUREG_FRAGOP_SHADOW" }, | 190 | {0x130, "GPUREG_FRAGOP_SHADOW"}, |
| 192 | 191 | ||
| 193 | { 0x140, "GPUREG_LIGHT0_SPECULAR0" }, | 192 | {0x140, "GPUREG_LIGHT0_SPECULAR0"}, |
| 194 | { 0x141, "GPUREG_LIGHT0_SPECULAR1" }, | 193 | {0x141, "GPUREG_LIGHT0_SPECULAR1"}, |
| 195 | { 0x142, "GPUREG_LIGHT0_DIFFUSE" }, | 194 | {0x142, "GPUREG_LIGHT0_DIFFUSE"}, |
| 196 | { 0x143, "GPUREG_LIGHT0_AMBIENT" }, | 195 | {0x143, "GPUREG_LIGHT0_AMBIENT"}, |
| 197 | { 0x144, "GPUREG_LIGHT0_XY" }, | 196 | {0x144, "GPUREG_LIGHT0_XY"}, |
| 198 | { 0x145, "GPUREG_LIGHT0_Z" }, | 197 | {0x145, "GPUREG_LIGHT0_Z"}, |
| 199 | { 0x146, "GPUREG_LIGHT0_SPOTDIR_XY" }, | 198 | {0x146, "GPUREG_LIGHT0_SPOTDIR_XY"}, |
| 200 | { 0x147, "GPUREG_LIGHT0_SPOTDIR_Z" }, | 199 | {0x147, "GPUREG_LIGHT0_SPOTDIR_Z"}, |
| 201 | 200 | ||
| 202 | { 0x149, "GPUREG_LIGHT0_CONFIG" }, | 201 | {0x149, "GPUREG_LIGHT0_CONFIG"}, |
| 203 | { 0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS" }, | 202 | {0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS"}, |
| 204 | { 0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE" }, | 203 | {0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE"}, |
| 205 | 204 | ||
| 206 | { 0x150, "GPUREG_LIGHT1_SPECULAR0" }, | 205 | {0x150, "GPUREG_LIGHT1_SPECULAR0"}, |
| 207 | { 0x151, "GPUREG_LIGHT1_SPECULAR1" }, | 206 | {0x151, "GPUREG_LIGHT1_SPECULAR1"}, |
| 208 | { 0x152, "GPUREG_LIGHT1_DIFFUSE" }, | 207 | {0x152, "GPUREG_LIGHT1_DIFFUSE"}, |
| 209 | { 0x153, "GPUREG_LIGHT1_AMBIENT" }, | 208 | {0x153, "GPUREG_LIGHT1_AMBIENT"}, |
| 210 | { 0x154, "GPUREG_LIGHT1_XY" }, | 209 | {0x154, "GPUREG_LIGHT1_XY"}, |
| 211 | { 0x155, "GPUREG_LIGHT1_Z" }, | 210 | {0x155, "GPUREG_LIGHT1_Z"}, |
| 212 | { 0x156, "GPUREG_LIGHT1_SPOTDIR_XY" }, | 211 | {0x156, "GPUREG_LIGHT1_SPOTDIR_XY"}, |
| 213 | { 0x157, "GPUREG_LIGHT1_SPOTDIR_Z" }, | 212 | {0x157, "GPUREG_LIGHT1_SPOTDIR_Z"}, |
| 214 | 213 | ||
| 215 | { 0x159, "GPUREG_LIGHT1_CONFIG" }, | 214 | {0x159, "GPUREG_LIGHT1_CONFIG"}, |
| 216 | { 0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS" }, | 215 | {0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS"}, |
| 217 | { 0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE" }, | 216 | {0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE"}, |
| 218 | 217 | ||
| 219 | { 0x160, "GPUREG_LIGHT2_SPECULAR0" }, | 218 | {0x160, "GPUREG_LIGHT2_SPECULAR0"}, |
| 220 | { 0x161, "GPUREG_LIGHT2_SPECULAR1" }, | 219 | {0x161, "GPUREG_LIGHT2_SPECULAR1"}, |
| 221 | { 0x162, "GPUREG_LIGHT2_DIFFUSE" }, | 220 | {0x162, "GPUREG_LIGHT2_DIFFUSE"}, |
| 222 | { 0x163, "GPUREG_LIGHT2_AMBIENT" }, | 221 | {0x163, "GPUREG_LIGHT2_AMBIENT"}, |
| 223 | { 0x164, "GPUREG_LIGHT2_XY" }, | 222 | {0x164, "GPUREG_LIGHT2_XY"}, |
| 224 | { 0x165, "GPUREG_LIGHT2_Z" }, | 223 | {0x165, "GPUREG_LIGHT2_Z"}, |
| 225 | { 0x166, "GPUREG_LIGHT2_SPOTDIR_XY" }, | 224 | {0x166, "GPUREG_LIGHT2_SPOTDIR_XY"}, |
| 226 | { 0x167, "GPUREG_LIGHT2_SPOTDIR_Z" }, | 225 | {0x167, "GPUREG_LIGHT2_SPOTDIR_Z"}, |
| 227 | 226 | ||
| 228 | { 0x169, "GPUREG_LIGHT2_CONFIG" }, | 227 | {0x169, "GPUREG_LIGHT2_CONFIG"}, |
| 229 | { 0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS" }, | 228 | {0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS"}, |
| 230 | { 0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE" }, | 229 | {0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE"}, |
| 231 | 230 | ||
| 232 | { 0x170, "GPUREG_LIGHT3_SPECULAR0" }, | 231 | {0x170, "GPUREG_LIGHT3_SPECULAR0"}, |
| 233 | { 0x171, "GPUREG_LIGHT3_SPECULAR1" }, | 232 | {0x171, "GPUREG_LIGHT3_SPECULAR1"}, |
| 234 | { 0x172, "GPUREG_LIGHT3_DIFFUSE" }, | 233 | {0x172, "GPUREG_LIGHT3_DIFFUSE"}, |
| 235 | { 0x173, "GPUREG_LIGHT3_AMBIENT" }, | 234 | {0x173, "GPUREG_LIGHT3_AMBIENT"}, |
| 236 | { 0x174, "GPUREG_LIGHT3_XY" }, | 235 | {0x174, "GPUREG_LIGHT3_XY"}, |
| 237 | { 0x175, "GPUREG_LIGHT3_Z" }, | 236 | {0x175, "GPUREG_LIGHT3_Z"}, |
| 238 | { 0x176, "GPUREG_LIGHT3_SPOTDIR_XY" }, | 237 | {0x176, "GPUREG_LIGHT3_SPOTDIR_XY"}, |
| 239 | { 0x177, "GPUREG_LIGHT3_SPOTDIR_Z" }, | 238 | {0x177, "GPUREG_LIGHT3_SPOTDIR_Z"}, |
| 240 | 239 | ||
| 241 | { 0x179, "GPUREG_LIGHT3_CONFIG" }, | 240 | {0x179, "GPUREG_LIGHT3_CONFIG"}, |
| 242 | { 0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS" }, | 241 | {0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS"}, |
| 243 | { 0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE" }, | 242 | {0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE"}, |
| 244 | 243 | ||
| 245 | { 0x180, "GPUREG_LIGHT4_SPECULAR0" }, | 244 | {0x180, "GPUREG_LIGHT4_SPECULAR0"}, |
| 246 | { 0x181, "GPUREG_LIGHT4_SPECULAR1" }, | 245 | {0x181, "GPUREG_LIGHT4_SPECULAR1"}, |
| 247 | { 0x182, "GPUREG_LIGHT4_DIFFUSE" }, | 246 | {0x182, "GPUREG_LIGHT4_DIFFUSE"}, |
| 248 | { 0x183, "GPUREG_LIGHT4_AMBIENT" }, | 247 | {0x183, "GPUREG_LIGHT4_AMBIENT"}, |
| 249 | { 0x184, "GPUREG_LIGHT4_XY" }, | 248 | {0x184, "GPUREG_LIGHT4_XY"}, |
| 250 | { 0x185, "GPUREG_LIGHT4_Z" }, | 249 | {0x185, "GPUREG_LIGHT4_Z"}, |
| 251 | { 0x186, "GPUREG_LIGHT4_SPOTDIR_XY" }, | 250 | {0x186, "GPUREG_LIGHT4_SPOTDIR_XY"}, |
| 252 | { 0x187, "GPUREG_LIGHT4_SPOTDIR_Z" }, | 251 | {0x187, "GPUREG_LIGHT4_SPOTDIR_Z"}, |
| 253 | 252 | ||
| 254 | { 0x189, "GPUREG_LIGHT4_CONFIG" }, | 253 | {0x189, "GPUREG_LIGHT4_CONFIG"}, |
| 255 | { 0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS" }, | 254 | {0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS"}, |
| 256 | { 0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE" }, | 255 | {0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE"}, |
| 257 | 256 | ||
| 258 | { 0x190, "GPUREG_LIGHT5_SPECULAR0" }, | 257 | {0x190, "GPUREG_LIGHT5_SPECULAR0"}, |
| 259 | { 0x191, "GPUREG_LIGHT5_SPECULAR1" }, | 258 | {0x191, "GPUREG_LIGHT5_SPECULAR1"}, |
| 260 | { 0x192, "GPUREG_LIGHT5_DIFFUSE" }, | 259 | {0x192, "GPUREG_LIGHT5_DIFFUSE"}, |
| 261 | { 0x193, "GPUREG_LIGHT5_AMBIENT" }, | 260 | {0x193, "GPUREG_LIGHT5_AMBIENT"}, |
| 262 | { 0x194, "GPUREG_LIGHT5_XY" }, | 261 | {0x194, "GPUREG_LIGHT5_XY"}, |
| 263 | { 0x195, "GPUREG_LIGHT5_Z" }, | 262 | {0x195, "GPUREG_LIGHT5_Z"}, |
| 264 | { 0x196, "GPUREG_LIGHT5_SPOTDIR_XY" }, | 263 | {0x196, "GPUREG_LIGHT5_SPOTDIR_XY"}, |
| 265 | { 0x197, "GPUREG_LIGHT5_SPOTDIR_Z" }, | 264 | {0x197, "GPUREG_LIGHT5_SPOTDIR_Z"}, |
| 266 | 265 | ||
| 267 | { 0x199, "GPUREG_LIGHT5_CONFIG" }, | 266 | {0x199, "GPUREG_LIGHT5_CONFIG"}, |
| 268 | { 0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS" }, | 267 | {0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS"}, |
| 269 | { 0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE" }, | 268 | {0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE"}, |
| 270 | 269 | ||
| 271 | { 0x1A0, "GPUREG_LIGHT6_SPECULAR0" }, | 270 | {0x1A0, "GPUREG_LIGHT6_SPECULAR0"}, |
| 272 | { 0x1A1, "GPUREG_LIGHT6_SPECULAR1" }, | 271 | {0x1A1, "GPUREG_LIGHT6_SPECULAR1"}, |
| 273 | { 0x1A2, "GPUREG_LIGHT6_DIFFUSE" }, | 272 | {0x1A2, "GPUREG_LIGHT6_DIFFUSE"}, |
| 274 | { 0x1A3, "GPUREG_LIGHT6_AMBIENT" }, | 273 | {0x1A3, "GPUREG_LIGHT6_AMBIENT"}, |
| 275 | { 0x1A4, "GPUREG_LIGHT6_XY" }, | 274 | {0x1A4, "GPUREG_LIGHT6_XY"}, |
| 276 | { 0x1A5, "GPUREG_LIGHT6_Z" }, | 275 | {0x1A5, "GPUREG_LIGHT6_Z"}, |
| 277 | { 0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY" }, | 276 | {0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY"}, |
| 278 | { 0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z" }, | 277 | {0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z"}, |
| 279 | 278 | ||
| 280 | { 0x1A9, "GPUREG_LIGHT6_CONFIG" }, | 279 | {0x1A9, "GPUREG_LIGHT6_CONFIG"}, |
| 281 | { 0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS" }, | 280 | {0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS"}, |
| 282 | { 0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE" }, | 281 | {0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE"}, |
| 283 | 282 | ||
| 284 | { 0x1B0, "GPUREG_LIGHT7_SPECULAR0" }, | 283 | {0x1B0, "GPUREG_LIGHT7_SPECULAR0"}, |
| 285 | { 0x1B1, "GPUREG_LIGHT7_SPECULAR1" }, | 284 | {0x1B1, "GPUREG_LIGHT7_SPECULAR1"}, |
| 286 | { 0x1B2, "GPUREG_LIGHT7_DIFFUSE" }, | 285 | {0x1B2, "GPUREG_LIGHT7_DIFFUSE"}, |
| 287 | { 0x1B3, "GPUREG_LIGHT7_AMBIENT" }, | 286 | {0x1B3, "GPUREG_LIGHT7_AMBIENT"}, |
| 288 | { 0x1B4, "GPUREG_LIGHT7_XY" }, | 287 | {0x1B4, "GPUREG_LIGHT7_XY"}, |
| 289 | { 0x1B5, "GPUREG_LIGHT7_Z" }, | 288 | {0x1B5, "GPUREG_LIGHT7_Z"}, |
| 290 | { 0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY" }, | 289 | {0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY"}, |
| 291 | { 0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z" }, | 290 | {0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z"}, |
| 292 | 291 | ||
| 293 | { 0x1B9, "GPUREG_LIGHT7_CONFIG" }, | 292 | {0x1B9, "GPUREG_LIGHT7_CONFIG"}, |
| 294 | { 0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS" }, | 293 | {0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS"}, |
| 295 | { 0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE" }, | 294 | {0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE"}, |
| 296 | 295 | ||
| 297 | { 0x1C0, "GPUREG_LIGHTING_AMBIENT" }, | 296 | {0x1C0, "GPUREG_LIGHTING_AMBIENT"}, |
| 298 | 297 | ||
| 299 | { 0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS" }, | 298 | {0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS"}, |
| 300 | { 0x1C3, "GPUREG_LIGHTING_CONFIG0" }, | 299 | {0x1C3, "GPUREG_LIGHTING_CONFIG0"}, |
| 301 | { 0x1C4, "GPUREG_LIGHTING_CONFIG1" }, | 300 | {0x1C4, "GPUREG_LIGHTING_CONFIG1"}, |
| 302 | { 0x1C5, "GPUREG_LIGHTING_LUT_INDEX" }, | 301 | {0x1C5, "GPUREG_LIGHTING_LUT_INDEX"}, |
| 303 | { 0x1C6, "GPUREG_LIGHTING_ENABLE1" }, | 302 | {0x1C6, "GPUREG_LIGHTING_ENABLE1"}, |
| 304 | 303 | ||
| 305 | { 0x1C8, "GPUREG_LIGHTING_LUT_DATA0" }, | 304 | {0x1C8, "GPUREG_LIGHTING_LUT_DATA0"}, |
| 306 | { 0x1C9, "GPUREG_LIGHTING_LUT_DATA1" }, | 305 | {0x1C9, "GPUREG_LIGHTING_LUT_DATA1"}, |
| 307 | { 0x1CA, "GPUREG_LIGHTING_LUT_DATA2" }, | 306 | {0x1CA, "GPUREG_LIGHTING_LUT_DATA2"}, |
| 308 | { 0x1CB, "GPUREG_LIGHTING_LUT_DATA3" }, | 307 | {0x1CB, "GPUREG_LIGHTING_LUT_DATA3"}, |
| 309 | { 0x1CC, "GPUREG_LIGHTING_LUT_DATA4" }, | 308 | {0x1CC, "GPUREG_LIGHTING_LUT_DATA4"}, |
| 310 | { 0x1CD, "GPUREG_LIGHTING_LUT_DATA5" }, | 309 | {0x1CD, "GPUREG_LIGHTING_LUT_DATA5"}, |
| 311 | { 0x1CE, "GPUREG_LIGHTING_LUT_DATA6" }, | 310 | {0x1CE, "GPUREG_LIGHTING_LUT_DATA6"}, |
| 312 | { 0x1CF, "GPUREG_LIGHTING_LUT_DATA7" }, | 311 | {0x1CF, "GPUREG_LIGHTING_LUT_DATA7"}, |
| 313 | { 0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS" }, | 312 | {0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS"}, |
| 314 | { 0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT" }, | 313 | {0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT"}, |
| 315 | { 0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE" }, | 314 | {0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE"}, |
| 316 | 315 | ||
| 317 | { 0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION" }, | 316 | {0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION"}, |
| 318 | 317 | ||
| 319 | { 0x200, "GPUREG_ATTRIBBUFFERS_LOC" }, | 318 | {0x200, "GPUREG_ATTRIBBUFFERS_LOC"}, |
| 320 | { 0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW" }, | 319 | {0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW"}, |
| 321 | { 0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH" }, | 320 | {0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH"}, |
| 322 | { 0x203, "GPUREG_ATTRIBBUFFER0_OFFSET" }, | 321 | {0x203, "GPUREG_ATTRIBBUFFER0_OFFSET"}, |
| 323 | { 0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1" }, | 322 | {0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1"}, |
| 324 | { 0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2" }, | 323 | {0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2"}, |
| 325 | { 0x206, "GPUREG_ATTRIBBUFFER1_OFFSET" }, | 324 | {0x206, "GPUREG_ATTRIBBUFFER1_OFFSET"}, |
| 326 | { 0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1" }, | 325 | {0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1"}, |
| 327 | { 0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2" }, | 326 | {0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2"}, |
| 328 | { 0x209, "GPUREG_ATTRIBBUFFER2_OFFSET" }, | 327 | {0x209, "GPUREG_ATTRIBBUFFER2_OFFSET"}, |
| 329 | { 0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1" }, | 328 | {0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1"}, |
| 330 | { 0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2" }, | 329 | {0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2"}, |
| 331 | { 0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET" }, | 330 | {0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET"}, |
| 332 | { 0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1" }, | 331 | {0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1"}, |
| 333 | { 0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2" }, | 332 | {0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2"}, |
| 334 | { 0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET" }, | 333 | {0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET"}, |
| 335 | { 0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1" }, | 334 | {0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1"}, |
| 336 | { 0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2" }, | 335 | {0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2"}, |
| 337 | { 0x212, "GPUREG_ATTRIBBUFFER5_OFFSET" }, | 336 | {0x212, "GPUREG_ATTRIBBUFFER5_OFFSET"}, |
| 338 | { 0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1" }, | 337 | {0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1"}, |
| 339 | { 0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2" }, | 338 | {0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2"}, |
| 340 | { 0x215, "GPUREG_ATTRIBBUFFER6_OFFSET" }, | 339 | {0x215, "GPUREG_ATTRIBBUFFER6_OFFSET"}, |
| 341 | { 0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1" }, | 340 | {0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1"}, |
| 342 | { 0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2" }, | 341 | {0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2"}, |
| 343 | { 0x218, "GPUREG_ATTRIBBUFFER7_OFFSET" }, | 342 | {0x218, "GPUREG_ATTRIBBUFFER7_OFFSET"}, |
| 344 | { 0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1" }, | 343 | {0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1"}, |
| 345 | { 0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2" }, | 344 | {0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2"}, |
| 346 | { 0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET" }, | 345 | {0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET"}, |
| 347 | { 0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1" }, | 346 | {0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1"}, |
| 348 | { 0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2" }, | 347 | {0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2"}, |
| 349 | { 0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET" }, | 348 | {0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET"}, |
| 350 | { 0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1" }, | 349 | {0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1"}, |
| 351 | { 0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2" }, | 350 | {0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2"}, |
| 352 | { 0x221, "GPUREG_ATTRIBBUFFER10_OFFSET" }, | 351 | {0x221, "GPUREG_ATTRIBBUFFER10_OFFSET"}, |
| 353 | { 0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1" }, | 352 | {0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1"}, |
| 354 | { 0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2" }, | 353 | {0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2"}, |
| 355 | { 0x224, "GPUREG_ATTRIBBUFFER11_OFFSET" }, | 354 | {0x224, "GPUREG_ATTRIBBUFFER11_OFFSET"}, |
| 356 | { 0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1" }, | 355 | {0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1"}, |
| 357 | { 0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2" }, | 356 | {0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2"}, |
| 358 | { 0x227, "GPUREG_INDEXBUFFER_CONFIG" }, | 357 | {0x227, "GPUREG_INDEXBUFFER_CONFIG"}, |
| 359 | { 0x228, "GPUREG_NUMVERTICES" }, | 358 | {0x228, "GPUREG_NUMVERTICES"}, |
| 360 | { 0x229, "GPUREG_GEOSTAGE_CONFIG" }, | 359 | {0x229, "GPUREG_GEOSTAGE_CONFIG"}, |
| 361 | { 0x22A, "GPUREG_VERTEX_OFFSET" }, | 360 | {0x22A, "GPUREG_VERTEX_OFFSET"}, |
| 362 | 361 | ||
| 363 | { 0x22D, "GPUREG_POST_VERTEX_CACHE_NUM" }, | 362 | {0x22D, "GPUREG_POST_VERTEX_CACHE_NUM"}, |
| 364 | { 0x22E, "GPUREG_DRAWARRAYS" }, | 363 | {0x22E, "GPUREG_DRAWARRAYS"}, |
| 365 | { 0x22F, "GPUREG_DRAWELEMENTS" }, | 364 | {0x22F, "GPUREG_DRAWELEMENTS"}, |
| 366 | 365 | ||
| 367 | { 0x231, "GPUREG_VTX_FUNC" }, | 366 | {0x231, "GPUREG_VTX_FUNC"}, |
| 368 | { 0x232, "GPUREG_FIXEDATTRIB_INDEX" }, | 367 | {0x232, "GPUREG_FIXEDATTRIB_INDEX"}, |
| 369 | { 0x233, "GPUREG_FIXEDATTRIB_DATA0" }, | 368 | {0x233, "GPUREG_FIXEDATTRIB_DATA0"}, |
| 370 | { 0x234, "GPUREG_FIXEDATTRIB_DATA1" }, | 369 | {0x234, "GPUREG_FIXEDATTRIB_DATA1"}, |
| 371 | { 0x235, "GPUREG_FIXEDATTRIB_DATA2" }, | 370 | {0x235, "GPUREG_FIXEDATTRIB_DATA2"}, |
| 372 | 371 | ||
| 373 | { 0x238, "GPUREG_CMDBUF_SIZE0" }, | 372 | {0x238, "GPUREG_CMDBUF_SIZE0"}, |
| 374 | { 0x239, "GPUREG_CMDBUF_SIZE1" }, | 373 | {0x239, "GPUREG_CMDBUF_SIZE1"}, |
| 375 | { 0x23A, "GPUREG_CMDBUF_ADDR0" }, | 374 | {0x23A, "GPUREG_CMDBUF_ADDR0"}, |
| 376 | { 0x23B, "GPUREG_CMDBUF_ADDR1" }, | 375 | {0x23B, "GPUREG_CMDBUF_ADDR1"}, |
| 377 | { 0x23C, "GPUREG_CMDBUF_JUMP0" }, | 376 | {0x23C, "GPUREG_CMDBUF_JUMP0"}, |
| 378 | { 0x23D, "GPUREG_CMDBUF_JUMP1" }, | 377 | {0x23D, "GPUREG_CMDBUF_JUMP1"}, |
| 379 | 378 | ||
| 380 | { 0x242, "GPUREG_VSH_NUM_ATTR" }, | 379 | {0x242, "GPUREG_VSH_NUM_ATTR"}, |
| 381 | 380 | ||
| 382 | { 0x244, "GPUREG_VSH_COM_MODE" }, | 381 | {0x244, "GPUREG_VSH_COM_MODE"}, |
| 383 | { 0x245, "GPUREG_START_DRAW_FUNC0" }, | 382 | {0x245, "GPUREG_START_DRAW_FUNC0"}, |
| 384 | 383 | ||
| 385 | { 0x24A, "GPUREG_VSH_OUTMAP_TOTAL1" }, | 384 | {0x24A, "GPUREG_VSH_OUTMAP_TOTAL1"}, |
| 386 | 385 | ||
| 387 | { 0x251, "GPUREG_VSH_OUTMAP_TOTAL2" }, | 386 | {0x251, "GPUREG_VSH_OUTMAP_TOTAL2"}, |
| 388 | { 0x252, "GPUREG_GSH_MISC0" }, | 387 | {0x252, "GPUREG_GSH_MISC0"}, |
| 389 | { 0x253, "GPUREG_GEOSTAGE_CONFIG2" }, | 388 | {0x253, "GPUREG_GEOSTAGE_CONFIG2"}, |
| 390 | { 0x254, "GPUREG_GSH_MISC1" }, | 389 | {0x254, "GPUREG_GSH_MISC1"}, |
| 391 | 390 | ||
| 392 | { 0x25E, "GPUREG_PRIMITIVE_CONFIG" }, | 391 | {0x25E, "GPUREG_PRIMITIVE_CONFIG"}, |
| 393 | { 0x25F, "GPUREG_RESTART_PRIMITIVE" }, | 392 | {0x25F, "GPUREG_RESTART_PRIMITIVE"}, |
| 394 | 393 | ||
| 395 | { 0x280, "GPUREG_GSH_BOOLUNIFORM" }, | 394 | {0x280, "GPUREG_GSH_BOOLUNIFORM"}, |
| 396 | { 0x281, "GPUREG_GSH_INTUNIFORM_I0" }, | 395 | {0x281, "GPUREG_GSH_INTUNIFORM_I0"}, |
| 397 | { 0x282, "GPUREG_GSH_INTUNIFORM_I1" }, | 396 | {0x282, "GPUREG_GSH_INTUNIFORM_I1"}, |
| 398 | { 0x283, "GPUREG_GSH_INTUNIFORM_I2" }, | 397 | {0x283, "GPUREG_GSH_INTUNIFORM_I2"}, |
| 399 | { 0x284, "GPUREG_GSH_INTUNIFORM_I3" }, | 398 | {0x284, "GPUREG_GSH_INTUNIFORM_I3"}, |
| 400 | 399 | ||
| 401 | { 0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG" }, | 400 | {0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG"}, |
| 402 | { 0x28A, "GPUREG_GSH_ENTRYPOINT" }, | 401 | {0x28A, "GPUREG_GSH_ENTRYPOINT"}, |
| 403 | { 0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW" }, | 402 | {0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW"}, |
| 404 | { 0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH" }, | 403 | {0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH"}, |
| 405 | { 0x28D, "GPUREG_GSH_OUTMAP_MASK" }, | 404 | {0x28D, "GPUREG_GSH_OUTMAP_MASK"}, |
| 406 | 405 | ||
| 407 | { 0x28F, "GPUREG_GSH_CODETRANSFER_END" }, | 406 | {0x28F, "GPUREG_GSH_CODETRANSFER_END"}, |
| 408 | { 0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX" }, | 407 | {0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX"}, |
| 409 | { 0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0" }, | 408 | {0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0"}, |
| 410 | { 0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1" }, | 409 | {0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1"}, |
| 411 | { 0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2" }, | 410 | {0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2"}, |
| 412 | { 0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3" }, | 411 | {0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3"}, |
| 413 | { 0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4" }, | 412 | {0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4"}, |
| 414 | { 0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5" }, | 413 | {0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5"}, |
| 415 | { 0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6" }, | 414 | {0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6"}, |
| 416 | { 0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7" }, | 415 | {0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7"}, |
| 417 | 416 | ||
| 418 | { 0x29B, "GPUREG_GSH_CODETRANSFER_INDEX" }, | 417 | {0x29B, "GPUREG_GSH_CODETRANSFER_INDEX"}, |
| 419 | { 0x29C, "GPUREG_GSH_CODETRANSFER_DATA0" }, | 418 | {0x29C, "GPUREG_GSH_CODETRANSFER_DATA0"}, |
| 420 | { 0x29D, "GPUREG_GSH_CODETRANSFER_DATA1" }, | 419 | {0x29D, "GPUREG_GSH_CODETRANSFER_DATA1"}, |
| 421 | { 0x29E, "GPUREG_GSH_CODETRANSFER_DATA2" }, | 420 | {0x29E, "GPUREG_GSH_CODETRANSFER_DATA2"}, |
| 422 | { 0x29F, "GPUREG_GSH_CODETRANSFER_DATA3" }, | 421 | {0x29F, "GPUREG_GSH_CODETRANSFER_DATA3"}, |
| 423 | { 0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4" }, | 422 | {0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4"}, |
| 424 | { 0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5" }, | 423 | {0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5"}, |
| 425 | { 0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6" }, | 424 | {0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6"}, |
| 426 | { 0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7" }, | 425 | {0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7"}, |
| 427 | 426 | ||
| 428 | { 0x2A5, "GPUREG_GSH_OPDESCS_INDEX" }, | 427 | {0x2A5, "GPUREG_GSH_OPDESCS_INDEX"}, |
| 429 | { 0x2A6, "GPUREG_GSH_OPDESCS_DATA0" }, | 428 | {0x2A6, "GPUREG_GSH_OPDESCS_DATA0"}, |
| 430 | { 0x2A7, "GPUREG_GSH_OPDESCS_DATA1" }, | 429 | {0x2A7, "GPUREG_GSH_OPDESCS_DATA1"}, |
| 431 | { 0x2A8, "GPUREG_GSH_OPDESCS_DATA2" }, | 430 | {0x2A8, "GPUREG_GSH_OPDESCS_DATA2"}, |
| 432 | { 0x2A9, "GPUREG_GSH_OPDESCS_DATA3" }, | 431 | {0x2A9, "GPUREG_GSH_OPDESCS_DATA3"}, |
| 433 | { 0x2AA, "GPUREG_GSH_OPDESCS_DATA4" }, | 432 | {0x2AA, "GPUREG_GSH_OPDESCS_DATA4"}, |
| 434 | { 0x2AB, "GPUREG_GSH_OPDESCS_DATA5" }, | 433 | {0x2AB, "GPUREG_GSH_OPDESCS_DATA5"}, |
| 435 | { 0x2AC, "GPUREG_GSH_OPDESCS_DATA6" }, | 434 | {0x2AC, "GPUREG_GSH_OPDESCS_DATA6"}, |
| 436 | { 0x2AD, "GPUREG_GSH_OPDESCS_DATA7" }, | 435 | {0x2AD, "GPUREG_GSH_OPDESCS_DATA7"}, |
| 437 | 436 | ||
| 438 | { 0x2B0, "GPUREG_VSH_BOOLUNIFORM" }, | 437 | {0x2B0, "GPUREG_VSH_BOOLUNIFORM"}, |
| 439 | { 0x2B1, "GPUREG_VSH_INTUNIFORM_I0" }, | 438 | {0x2B1, "GPUREG_VSH_INTUNIFORM_I0"}, |
| 440 | { 0x2B2, "GPUREG_VSH_INTUNIFORM_I1" }, | 439 | {0x2B2, "GPUREG_VSH_INTUNIFORM_I1"}, |
| 441 | { 0x2B3, "GPUREG_VSH_INTUNIFORM_I2" }, | 440 | {0x2B3, "GPUREG_VSH_INTUNIFORM_I2"}, |
| 442 | { 0x2B4, "GPUREG_VSH_INTUNIFORM_I3" }, | 441 | {0x2B4, "GPUREG_VSH_INTUNIFORM_I3"}, |
| 443 | 442 | ||
| 444 | { 0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG" }, | 443 | {0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG"}, |
| 445 | { 0x2BA, "GPUREG_VSH_ENTRYPOINT" }, | 444 | {0x2BA, "GPUREG_VSH_ENTRYPOINT"}, |
| 446 | { 0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW" }, | 445 | {0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW"}, |
| 447 | { 0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH" }, | 446 | {0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH"}, |
| 448 | { 0x2BD, "GPUREG_VSH_OUTMAP_MASK" }, | 447 | {0x2BD, "GPUREG_VSH_OUTMAP_MASK"}, |
| 449 | 448 | ||
| 450 | { 0x2BF, "GPUREG_VSH_CODETRANSFER_END" }, | 449 | {0x2BF, "GPUREG_VSH_CODETRANSFER_END"}, |
| 451 | { 0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX" }, | 450 | {0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX"}, |
| 452 | { 0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0" }, | 451 | {0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0"}, |
| 453 | { 0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1" }, | 452 | {0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1"}, |
| 454 | { 0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2" }, | 453 | {0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2"}, |
| 455 | { 0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3" }, | 454 | {0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3"}, |
| 456 | { 0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4" }, | 455 | {0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4"}, |
| 457 | { 0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5" }, | 456 | {0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5"}, |
| 458 | { 0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6" }, | 457 | {0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6"}, |
| 459 | { 0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7" }, | 458 | {0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7"}, |
| 460 | 459 | ||
| 461 | { 0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX" }, | 460 | {0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX"}, |
| 462 | { 0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0" }, | 461 | {0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0"}, |
| 463 | { 0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1" }, | 462 | {0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1"}, |
| 464 | { 0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2" }, | 463 | {0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2"}, |
| 465 | { 0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3" }, | 464 | {0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3"}, |
| 466 | { 0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4" }, | 465 | {0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4"}, |
| 467 | { 0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5" }, | 466 | {0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5"}, |
| 468 | { 0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6" }, | 467 | {0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6"}, |
| 469 | { 0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7" }, | 468 | {0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7"}, |
| 470 | 469 | ||
| 471 | { 0x2D5, "GPUREG_VSH_OPDESCS_INDEX" }, | 470 | {0x2D5, "GPUREG_VSH_OPDESCS_INDEX"}, |
| 472 | { 0x2D6, "GPUREG_VSH_OPDESCS_DATA0" }, | 471 | {0x2D6, "GPUREG_VSH_OPDESCS_DATA0"}, |
| 473 | { 0x2D7, "GPUREG_VSH_OPDESCS_DATA1" }, | 472 | {0x2D7, "GPUREG_VSH_OPDESCS_DATA1"}, |
| 474 | { 0x2D8, "GPUREG_VSH_OPDESCS_DATA2" }, | 473 | {0x2D8, "GPUREG_VSH_OPDESCS_DATA2"}, |
| 475 | { 0x2D9, "GPUREG_VSH_OPDESCS_DATA3" }, | 474 | {0x2D9, "GPUREG_VSH_OPDESCS_DATA3"}, |
| 476 | { 0x2DA, "GPUREG_VSH_OPDESCS_DATA4" }, | 475 | {0x2DA, "GPUREG_VSH_OPDESCS_DATA4"}, |
| 477 | { 0x2DB, "GPUREG_VSH_OPDESCS_DATA5" }, | 476 | {0x2DB, "GPUREG_VSH_OPDESCS_DATA5"}, |
| 478 | { 0x2DC, "GPUREG_VSH_OPDESCS_DATA6" }, | 477 | {0x2DC, "GPUREG_VSH_OPDESCS_DATA6"}, |
| 479 | { 0x2DD, "GPUREG_VSH_OPDESCS_DATA7" }, | 478 | {0x2DD, "GPUREG_VSH_OPDESCS_DATA7"}, |
| 480 | }; | 479 | }; |
| 481 | 480 | ||
| 482 | std::string Regs::GetCommandName(int index) { | 481 | std::string Regs::GetCommandName(int index) { |
| @@ -516,5 +515,4 @@ void State::Reset() { | |||
| 516 | Zero(immediate); | 515 | Zero(immediate); |
| 517 | primitive_assembler.Reconfigure(Regs::TriangleTopology::List); | 516 | primitive_assembler.Reconfigure(Regs::TriangleTopology::List); |
| 518 | } | 517 | } |
| 519 | |||
| 520 | } | 518 | } |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 7099c31a0..b2db609ec 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -16,15 +16,16 @@ | |||
| 16 | #include "common/bit_field.h" | 16 | #include "common/bit_field.h" |
| 17 | #include "common/common_funcs.h" | 17 | #include "common/common_funcs.h" |
| 18 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 19 | #include "common/vector_math.h" | ||
| 20 | #include "common/logging/log.h" | 19 | #include "common/logging/log.h" |
| 20 | #include "common/vector_math.h" | ||
| 21 | 21 | ||
| 22 | namespace Pica { | 22 | namespace Pica { |
| 23 | 23 | ||
| 24 | // Returns index corresponding to the Regs member labeled by field_name | 24 | // Returns index corresponding to the Regs member labeled by field_name |
| 25 | // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions | 25 | // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions |
| 26 | // when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). | 26 | // when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). |
| 27 | // For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members | 27 | // For details cf. |
| 28 | // https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members | ||
| 28 | // Hopefully, this will be fixed sometime in the future. | 29 | // Hopefully, this will be fixed sometime in the future. |
| 29 | // For lack of better alternatives, we currently hardcode the offsets when constant | 30 | // For lack of better alternatives, we currently hardcode the offsets when constant |
| 30 | // expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts | 31 | // expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts |
| @@ -37,8 +38,9 @@ namespace Pica { | |||
| 37 | // really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX | 38 | // really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX |
| 38 | // and then performs a (no-op) cast to size_t iff the second argument matches the expected | 39 | // and then performs a (no-op) cast to size_t iff the second argument matches the expected |
| 39 | // field offset. Otherwise, the compiler will fail to compile this code. | 40 | // field offset. Otherwise, the compiler will fail to compile this code. |
| 40 | #define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ | 41 | #define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ |
| 41 | ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name)) | 42 | ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), \ |
| 43 | size_t>::type)PICA_REG_INDEX(field_name)) | ||
| 42 | #endif // _MSC_VER | 44 | #endif // _MSC_VER |
| 43 | 45 | ||
| 44 | struct Regs { | 46 | struct Regs { |
| @@ -51,8 +53,8 @@ struct Regs { | |||
| 51 | 53 | ||
| 52 | enum class CullMode : u32 { | 54 | enum class CullMode : u32 { |
| 53 | // Select which polygons are considered to be "frontfacing". | 55 | // Select which polygons are considered to be "frontfacing". |
| 54 | KeepAll = 0, | 56 | KeepAll = 0, |
| 55 | KeepClockWise = 1, | 57 | KeepClockWise = 1, |
| 56 | KeepCounterClockWise = 2, | 58 | KeepCounterClockWise = 2, |
| 57 | // TODO: What does the third value imply? | 59 | // TODO: What does the third value imply? |
| 58 | }; | 60 | }; |
| @@ -69,48 +71,47 @@ struct Regs { | |||
| 69 | 71 | ||
| 70 | INSERT_PADDING_WORDS(0x9); | 72 | INSERT_PADDING_WORDS(0x9); |
| 71 | 73 | ||
| 72 | BitField<0, 24, u32> viewport_depth_range; // float24 | 74 | BitField<0, 24, u32> viewport_depth_range; // float24 |
| 73 | BitField<0, 24, u32> viewport_depth_near_plane; // float24 | 75 | BitField<0, 24, u32> viewport_depth_near_plane; // float24 |
| 74 | 76 | ||
| 75 | BitField<0, 3, u32> vs_output_total; | 77 | BitField<0, 3, u32> vs_output_total; |
| 76 | 78 | ||
| 77 | union VSOutputAttributes { | 79 | union VSOutputAttributes { |
| 78 | // Maps components of output vertex attributes to semantics | 80 | // Maps components of output vertex attributes to semantics |
| 79 | enum Semantic : u32 | 81 | enum Semantic : u32 { |
| 80 | { | 82 | POSITION_X = 0, |
| 81 | POSITION_X = 0, | 83 | POSITION_Y = 1, |
| 82 | POSITION_Y = 1, | 84 | POSITION_Z = 2, |
| 83 | POSITION_Z = 2, | 85 | POSITION_W = 3, |
| 84 | POSITION_W = 3, | 86 | |
| 85 | 87 | QUATERNION_X = 4, | |
| 86 | QUATERNION_X = 4, | 88 | QUATERNION_Y = 5, |
| 87 | QUATERNION_Y = 5, | 89 | QUATERNION_Z = 6, |
| 88 | QUATERNION_Z = 6, | 90 | QUATERNION_W = 7, |
| 89 | QUATERNION_W = 7, | 91 | |
| 90 | 92 | COLOR_R = 8, | |
| 91 | COLOR_R = 8, | 93 | COLOR_G = 9, |
| 92 | COLOR_G = 9, | 94 | COLOR_B = 10, |
| 93 | COLOR_B = 10, | 95 | COLOR_A = 11, |
| 94 | COLOR_A = 11, | 96 | |
| 95 | 97 | TEXCOORD0_U = 12, | |
| 96 | TEXCOORD0_U = 12, | 98 | TEXCOORD0_V = 13, |
| 97 | TEXCOORD0_V = 13, | 99 | TEXCOORD1_U = 14, |
| 98 | TEXCOORD1_U = 14, | 100 | TEXCOORD1_V = 15, |
| 99 | TEXCOORD1_V = 15, | ||
| 100 | 101 | ||
| 101 | // TODO: Not verified | 102 | // TODO: Not verified |
| 102 | VIEW_X = 18, | 103 | VIEW_X = 18, |
| 103 | VIEW_Y = 19, | 104 | VIEW_Y = 19, |
| 104 | VIEW_Z = 20, | 105 | VIEW_Z = 20, |
| 105 | 106 | ||
| 106 | TEXCOORD2_U = 22, | 107 | TEXCOORD2_U = 22, |
| 107 | TEXCOORD2_V = 23, | 108 | TEXCOORD2_V = 23, |
| 108 | 109 | ||
| 109 | INVALID = 31, | 110 | INVALID = 31, |
| 110 | }; | 111 | }; |
| 111 | 112 | ||
| 112 | BitField< 0, 5, Semantic> map_x; | 113 | BitField<0, 5, Semantic> map_x; |
| 113 | BitField< 8, 5, Semantic> map_y; | 114 | BitField<8, 5, Semantic> map_y; |
| 114 | BitField<16, 5, Semantic> map_z; | 115 | BitField<16, 5, Semantic> map_z; |
| 115 | BitField<24, 5, Semantic> map_w; | 116 | BitField<24, 5, Semantic> map_w; |
| 116 | } vs_output_attributes[7]; | 117 | } vs_output_attributes[7]; |
| @@ -128,77 +129,78 @@ struct Regs { | |||
| 128 | BitField<0, 2, ScissorMode> mode; | 129 | BitField<0, 2, ScissorMode> mode; |
| 129 | 130 | ||
| 130 | union { | 131 | union { |
| 131 | BitField< 0, 16, u32> x1; | 132 | BitField<0, 16, u32> x1; |
| 132 | BitField<16, 16, u32> y1; | 133 | BitField<16, 16, u32> y1; |
| 133 | }; | 134 | }; |
| 134 | 135 | ||
| 135 | union { | 136 | union { |
| 136 | BitField< 0, 16, u32> x2; | 137 | BitField<0, 16, u32> x2; |
| 137 | BitField<16, 16, u32> y2; | 138 | BitField<16, 16, u32> y2; |
| 138 | }; | 139 | }; |
| 139 | } scissor_test; | 140 | } scissor_test; |
| 140 | 141 | ||
| 141 | union { | 142 | union { |
| 142 | BitField< 0, 10, s32> x; | 143 | BitField<0, 10, s32> x; |
| 143 | BitField<16, 10, s32> y; | 144 | BitField<16, 10, s32> y; |
| 144 | } viewport_corner; | 145 | } viewport_corner; |
| 145 | 146 | ||
| 146 | INSERT_PADDING_WORDS(0x1); | 147 | INSERT_PADDING_WORDS(0x1); |
| 147 | 148 | ||
| 148 | //TODO: early depth | 149 | // TODO: early depth |
| 149 | INSERT_PADDING_WORDS(0x1); | 150 | INSERT_PADDING_WORDS(0x1); |
| 150 | 151 | ||
| 151 | INSERT_PADDING_WORDS(0x2); | 152 | INSERT_PADDING_WORDS(0x2); |
| 152 | 153 | ||
| 153 | enum DepthBuffering : u32 { | 154 | enum DepthBuffering : u32 { |
| 154 | WBuffering = 0, | 155 | WBuffering = 0, |
| 155 | ZBuffering = 1, | 156 | ZBuffering = 1, |
| 156 | }; | 157 | }; |
| 157 | BitField< 0, 1, DepthBuffering> depthmap_enable; | 158 | BitField<0, 1, DepthBuffering> depthmap_enable; |
| 158 | 159 | ||
| 159 | INSERT_PADDING_WORDS(0x12); | 160 | INSERT_PADDING_WORDS(0x12); |
| 160 | 161 | ||
| 161 | struct TextureConfig { | 162 | struct TextureConfig { |
| 162 | enum TextureType : u32 { | 163 | enum TextureType : u32 { |
| 163 | Texture2D = 0, | 164 | Texture2D = 0, |
| 164 | TextureCube = 1, | 165 | TextureCube = 1, |
| 165 | Shadow2D = 2, | 166 | Shadow2D = 2, |
| 166 | Projection2D = 3, | 167 | Projection2D = 3, |
| 167 | ShadowCube = 4, | 168 | ShadowCube = 4, |
| 168 | Disabled = 5, | 169 | Disabled = 5, |
| 169 | }; | 170 | }; |
| 170 | 171 | ||
| 171 | enum WrapMode : u32 { | 172 | enum WrapMode : u32 { |
| 172 | ClampToEdge = 0, | 173 | ClampToEdge = 0, |
| 173 | ClampToBorder = 1, | 174 | ClampToBorder = 1, |
| 174 | Repeat = 2, | 175 | Repeat = 2, |
| 175 | MirroredRepeat = 3, | 176 | MirroredRepeat = 3, |
| 176 | }; | 177 | }; |
| 177 | 178 | ||
| 178 | enum TextureFilter : u32 { | 179 | enum TextureFilter : u32 { |
| 179 | Nearest = 0, | 180 | Nearest = 0, |
| 180 | Linear = 1 | 181 | Linear = 1, |
| 181 | }; | 182 | }; |
| 182 | 183 | ||
| 183 | union { | 184 | union { |
| 184 | u32 raw; | 185 | u32 raw; |
| 185 | BitField< 0, 8, u32> r; | 186 | BitField<0, 8, u32> r; |
| 186 | BitField< 8, 8, u32> g; | 187 | BitField<8, 8, u32> g; |
| 187 | BitField<16, 8, u32> b; | 188 | BitField<16, 8, u32> b; |
| 188 | BitField<24, 8, u32> a; | 189 | BitField<24, 8, u32> a; |
| 189 | } border_color; | 190 | } border_color; |
| 190 | 191 | ||
| 191 | union { | 192 | union { |
| 192 | BitField< 0, 16, u32> height; | 193 | BitField<0, 16, u32> height; |
| 193 | BitField<16, 16, u32> width; | 194 | BitField<16, 16, u32> width; |
| 194 | }; | 195 | }; |
| 195 | 196 | ||
| 196 | union { | 197 | union { |
| 197 | BitField< 1, 1, TextureFilter> mag_filter; | 198 | BitField<1, 1, TextureFilter> mag_filter; |
| 198 | BitField< 2, 1, TextureFilter> min_filter; | 199 | BitField<2, 1, TextureFilter> min_filter; |
| 199 | BitField< 8, 2, WrapMode> wrap_t; | 200 | BitField<8, 2, WrapMode> wrap_t; |
| 200 | BitField<12, 2, WrapMode> wrap_s; | 201 | BitField<12, 2, WrapMode> wrap_s; |
| 201 | BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew. | 202 | BitField<28, 2, TextureType> |
| 203 | type; ///< @note Only valid for texture 0 according to 3DBrew. | ||
| 202 | }; | 204 | }; |
| 203 | 205 | ||
| 204 | INSERT_PADDING_WORDS(0x1); | 206 | INSERT_PADDING_WORDS(0x1); |
| @@ -216,39 +218,39 @@ struct Regs { | |||
| 216 | }; | 218 | }; |
| 217 | 219 | ||
| 218 | enum class TextureFormat : u32 { | 220 | enum class TextureFormat : u32 { |
| 219 | RGBA8 = 0, | 221 | RGBA8 = 0, |
| 220 | RGB8 = 1, | 222 | RGB8 = 1, |
| 221 | RGB5A1 = 2, | 223 | RGB5A1 = 2, |
| 222 | RGB565 = 3, | 224 | RGB565 = 3, |
| 223 | RGBA4 = 4, | 225 | RGBA4 = 4, |
| 224 | IA8 = 5, | 226 | IA8 = 5, |
| 225 | RG8 = 6, ///< @note Also called HILO8 in 3DBrew. | 227 | RG8 = 6, ///< @note Also called HILO8 in 3DBrew. |
| 226 | I8 = 7, | 228 | I8 = 7, |
| 227 | A8 = 8, | 229 | A8 = 8, |
| 228 | IA4 = 9, | 230 | IA4 = 9, |
| 229 | I4 = 10, | 231 | I4 = 10, |
| 230 | A4 = 11, | 232 | A4 = 11, |
| 231 | ETC1 = 12, // compressed | 233 | ETC1 = 12, // compressed |
| 232 | ETC1A4 = 13, // compressed | 234 | ETC1A4 = 13, // compressed |
| 233 | }; | 235 | }; |
| 234 | 236 | ||
| 235 | enum class LogicOp : u32 { | 237 | enum class LogicOp : u32 { |
| 236 | Clear = 0, | 238 | Clear = 0, |
| 237 | And = 1, | 239 | And = 1, |
| 238 | AndReverse = 2, | 240 | AndReverse = 2, |
| 239 | Copy = 3, | 241 | Copy = 3, |
| 240 | Set = 4, | 242 | Set = 4, |
| 241 | CopyInverted = 5, | 243 | CopyInverted = 5, |
| 242 | NoOp = 6, | 244 | NoOp = 6, |
| 243 | Invert = 7, | 245 | Invert = 7, |
| 244 | Nand = 8, | 246 | Nand = 8, |
| 245 | Or = 9, | 247 | Or = 9, |
| 246 | Nor = 10, | 248 | Nor = 10, |
| 247 | Xor = 11, | 249 | Xor = 11, |
| 248 | Equiv = 12, | 250 | Equiv = 12, |
| 249 | AndInverted = 13, | 251 | AndInverted = 13, |
| 250 | OrReverse = 14, | 252 | OrReverse = 14, |
| 251 | OrInverted = 15, | 253 | OrInverted = 15, |
| 252 | }; | 254 | }; |
| 253 | 255 | ||
| 254 | static unsigned NibblesPerPixel(TextureFormat format) { | 256 | static unsigned NibblesPerPixel(TextureFormat format) { |
| @@ -273,15 +275,15 @@ struct Regs { | |||
| 273 | case TextureFormat::I8: | 275 | case TextureFormat::I8: |
| 274 | case TextureFormat::A8: | 276 | case TextureFormat::A8: |
| 275 | case TextureFormat::IA4: | 277 | case TextureFormat::IA4: |
| 276 | default: // placeholder for yet unknown formats | 278 | default: // placeholder for yet unknown formats |
| 277 | return 2; | 279 | return 2; |
| 278 | } | 280 | } |
| 279 | } | 281 | } |
| 280 | 282 | ||
| 281 | union { | 283 | union { |
| 282 | BitField< 0, 1, u32> texture0_enable; | 284 | BitField<0, 1, u32> texture0_enable; |
| 283 | BitField< 1, 1, u32> texture1_enable; | 285 | BitField<1, 1, u32> texture1_enable; |
| 284 | BitField< 2, 1, u32> texture2_enable; | 286 | BitField<2, 1, u32> texture2_enable; |
| 285 | }; | 287 | }; |
| 286 | TextureConfig texture0; | 288 | TextureConfig texture0; |
| 287 | INSERT_PADDING_WORDS(0x8); | 289 | INSERT_PADDING_WORDS(0x8); |
| @@ -302,63 +304,63 @@ struct Regs { | |||
| 302 | }; | 304 | }; |
| 303 | const std::array<FullTextureConfig, 3> GetTextures() const { | 305 | const std::array<FullTextureConfig, 3> GetTextures() const { |
| 304 | return {{ | 306 | return {{ |
| 305 | { texture0_enable.ToBool(), texture0, texture0_format }, | 307 | {texture0_enable.ToBool(), texture0, texture0_format}, |
| 306 | { texture1_enable.ToBool(), texture1, texture1_format }, | 308 | {texture1_enable.ToBool(), texture1, texture1_format}, |
| 307 | { texture2_enable.ToBool(), texture2, texture2_format } | 309 | {texture2_enable.ToBool(), texture2, texture2_format}, |
| 308 | }}; | 310 | }}; |
| 309 | } | 311 | } |
| 310 | 312 | ||
| 311 | // 0xc0-0xff: Texture Combiner (akin to glTexEnv) | 313 | // 0xc0-0xff: Texture Combiner (akin to glTexEnv) |
| 312 | struct TevStageConfig { | 314 | struct TevStageConfig { |
| 313 | enum class Source : u32 { | 315 | enum class Source : u32 { |
| 314 | PrimaryColor = 0x0, | 316 | PrimaryColor = 0x0, |
| 315 | PrimaryFragmentColor = 0x1, | 317 | PrimaryFragmentColor = 0x1, |
| 316 | SecondaryFragmentColor = 0x2, | 318 | SecondaryFragmentColor = 0x2, |
| 317 | 319 | ||
| 318 | Texture0 = 0x3, | 320 | Texture0 = 0x3, |
| 319 | Texture1 = 0x4, | 321 | Texture1 = 0x4, |
| 320 | Texture2 = 0x5, | 322 | Texture2 = 0x5, |
| 321 | Texture3 = 0x6, | 323 | Texture3 = 0x6, |
| 322 | 324 | ||
| 323 | PreviousBuffer = 0xd, | 325 | PreviousBuffer = 0xd, |
| 324 | Constant = 0xe, | 326 | Constant = 0xe, |
| 325 | Previous = 0xf, | 327 | Previous = 0xf, |
| 326 | }; | 328 | }; |
| 327 | 329 | ||
| 328 | enum class ColorModifier : u32 { | 330 | enum class ColorModifier : u32 { |
| 329 | SourceColor = 0x0, | 331 | SourceColor = 0x0, |
| 330 | OneMinusSourceColor = 0x1, | 332 | OneMinusSourceColor = 0x1, |
| 331 | SourceAlpha = 0x2, | 333 | SourceAlpha = 0x2, |
| 332 | OneMinusSourceAlpha = 0x3, | 334 | OneMinusSourceAlpha = 0x3, |
| 333 | SourceRed = 0x4, | 335 | SourceRed = 0x4, |
| 334 | OneMinusSourceRed = 0x5, | 336 | OneMinusSourceRed = 0x5, |
| 335 | 337 | ||
| 336 | SourceGreen = 0x8, | 338 | SourceGreen = 0x8, |
| 337 | OneMinusSourceGreen = 0x9, | 339 | OneMinusSourceGreen = 0x9, |
| 338 | 340 | ||
| 339 | SourceBlue = 0xc, | 341 | SourceBlue = 0xc, |
| 340 | OneMinusSourceBlue = 0xd, | 342 | OneMinusSourceBlue = 0xd, |
| 341 | }; | 343 | }; |
| 342 | 344 | ||
| 343 | enum class AlphaModifier : u32 { | 345 | enum class AlphaModifier : u32 { |
| 344 | SourceAlpha = 0x0, | 346 | SourceAlpha = 0x0, |
| 345 | OneMinusSourceAlpha = 0x1, | 347 | OneMinusSourceAlpha = 0x1, |
| 346 | SourceRed = 0x2, | 348 | SourceRed = 0x2, |
| 347 | OneMinusSourceRed = 0x3, | 349 | OneMinusSourceRed = 0x3, |
| 348 | SourceGreen = 0x4, | 350 | SourceGreen = 0x4, |
| 349 | OneMinusSourceGreen = 0x5, | 351 | OneMinusSourceGreen = 0x5, |
| 350 | SourceBlue = 0x6, | 352 | SourceBlue = 0x6, |
| 351 | OneMinusSourceBlue = 0x7, | 353 | OneMinusSourceBlue = 0x7, |
| 352 | }; | 354 | }; |
| 353 | 355 | ||
| 354 | enum class Operation : u32 { | 356 | enum class Operation : u32 { |
| 355 | Replace = 0, | 357 | Replace = 0, |
| 356 | Modulate = 1, | 358 | Modulate = 1, |
| 357 | Add = 2, | 359 | Add = 2, |
| 358 | AddSigned = 3, | 360 | AddSigned = 3, |
| 359 | Lerp = 4, | 361 | Lerp = 4, |
| 360 | Subtract = 5, | 362 | Subtract = 5, |
| 361 | Dot3_RGB = 6, | 363 | Dot3_RGB = 6, |
| 362 | 364 | ||
| 363 | MultiplyThenAdd = 8, | 365 | MultiplyThenAdd = 8, |
| 364 | AddThenMultiply = 9, | 366 | AddThenMultiply = 9, |
| @@ -366,9 +368,9 @@ struct Regs { | |||
| 366 | 368 | ||
| 367 | union { | 369 | union { |
| 368 | u32 sources_raw; | 370 | u32 sources_raw; |
| 369 | BitField< 0, 4, Source> color_source1; | 371 | BitField<0, 4, Source> color_source1; |
| 370 | BitField< 4, 4, Source> color_source2; | 372 | BitField<4, 4, Source> color_source2; |
| 371 | BitField< 8, 4, Source> color_source3; | 373 | BitField<8, 4, Source> color_source3; |
| 372 | BitField<16, 4, Source> alpha_source1; | 374 | BitField<16, 4, Source> alpha_source1; |
| 373 | BitField<20, 4, Source> alpha_source2; | 375 | BitField<20, 4, Source> alpha_source2; |
| 374 | BitField<24, 4, Source> alpha_source3; | 376 | BitField<24, 4, Source> alpha_source3; |
| @@ -376,9 +378,9 @@ struct Regs { | |||
| 376 | 378 | ||
| 377 | union { | 379 | union { |
| 378 | u32 modifiers_raw; | 380 | u32 modifiers_raw; |
| 379 | BitField< 0, 4, ColorModifier> color_modifier1; | 381 | BitField<0, 4, ColorModifier> color_modifier1; |
| 380 | BitField< 4, 4, ColorModifier> color_modifier2; | 382 | BitField<4, 4, ColorModifier> color_modifier2; |
| 381 | BitField< 8, 4, ColorModifier> color_modifier3; | 383 | BitField<8, 4, ColorModifier> color_modifier3; |
| 382 | BitField<12, 3, AlphaModifier> alpha_modifier1; | 384 | BitField<12, 3, AlphaModifier> alpha_modifier1; |
| 383 | BitField<16, 3, AlphaModifier> alpha_modifier2; | 385 | BitField<16, 3, AlphaModifier> alpha_modifier2; |
| 384 | BitField<20, 3, AlphaModifier> alpha_modifier3; | 386 | BitField<20, 3, AlphaModifier> alpha_modifier3; |
| @@ -386,21 +388,21 @@ struct Regs { | |||
| 386 | 388 | ||
| 387 | union { | 389 | union { |
| 388 | u32 ops_raw; | 390 | u32 ops_raw; |
| 389 | BitField< 0, 4, Operation> color_op; | 391 | BitField<0, 4, Operation> color_op; |
| 390 | BitField<16, 4, Operation> alpha_op; | 392 | BitField<16, 4, Operation> alpha_op; |
| 391 | }; | 393 | }; |
| 392 | 394 | ||
| 393 | union { | 395 | union { |
| 394 | u32 const_color; | 396 | u32 const_color; |
| 395 | BitField< 0, 8, u32> const_r; | 397 | BitField<0, 8, u32> const_r; |
| 396 | BitField< 8, 8, u32> const_g; | 398 | BitField<8, 8, u32> const_g; |
| 397 | BitField<16, 8, u32> const_b; | 399 | BitField<16, 8, u32> const_b; |
| 398 | BitField<24, 8, u32> const_a; | 400 | BitField<24, 8, u32> const_a; |
| 399 | }; | 401 | }; |
| 400 | 402 | ||
| 401 | union { | 403 | union { |
| 402 | u32 scales_raw; | 404 | u32 scales_raw; |
| 403 | BitField< 0, 2, u32> color_scale; | 405 | BitField<0, 2, u32> color_scale; |
| 404 | BitField<16, 2, u32> alpha_scale; | 406 | BitField<16, 2, u32> alpha_scale; |
| 405 | }; | 407 | }; |
| 406 | 408 | ||
| @@ -424,8 +426,8 @@ struct Regs { | |||
| 424 | 426 | ||
| 425 | enum class FogMode : u32 { | 427 | enum class FogMode : u32 { |
| 426 | None = 0, | 428 | None = 0, |
| 427 | Fog = 5, | 429 | Fog = 5, |
| 428 | Gas = 7, | 430 | Gas = 7, |
| 429 | }; | 431 | }; |
| 430 | 432 | ||
| 431 | union { | 433 | union { |
| @@ -435,7 +437,7 @@ struct Regs { | |||
| 435 | union { | 437 | union { |
| 436 | // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in | 438 | // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in |
| 437 | // these masks are set | 439 | // these masks are set |
| 438 | BitField< 8, 4, u32> update_mask_rgb; | 440 | BitField<8, 4, u32> update_mask_rgb; |
| 439 | BitField<12, 4, u32> update_mask_a; | 441 | BitField<12, 4, u32> update_mask_a; |
| 440 | 442 | ||
| 441 | bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { | 443 | bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { |
| @@ -450,8 +452,8 @@ struct Regs { | |||
| 450 | 452 | ||
| 451 | union { | 453 | union { |
| 452 | u32 raw; | 454 | u32 raw; |
| 453 | BitField< 0, 8, u32> r; | 455 | BitField<0, 8, u32> r; |
| 454 | BitField< 8, 8, u32> g; | 456 | BitField<8, 8, u32> g; |
| 455 | BitField<16, 8, u32> b; | 457 | BitField<16, 8, u32> b; |
| 456 | } fog_color; | 458 | } fog_color; |
| 457 | 459 | ||
| @@ -469,66 +471,64 @@ struct Regs { | |||
| 469 | 471 | ||
| 470 | union { | 472 | union { |
| 471 | u32 raw; | 473 | u32 raw; |
| 472 | BitField< 0, 8, u32> r; | 474 | BitField<0, 8, u32> r; |
| 473 | BitField< 8, 8, u32> g; | 475 | BitField<8, 8, u32> g; |
| 474 | BitField<16, 8, u32> b; | 476 | BitField<16, 8, u32> b; |
| 475 | BitField<24, 8, u32> a; | 477 | BitField<24, 8, u32> a; |
| 476 | } tev_combiner_buffer_color; | 478 | } tev_combiner_buffer_color; |
| 477 | 479 | ||
| 478 | INSERT_PADDING_WORDS(0x2); | 480 | INSERT_PADDING_WORDS(0x2); |
| 479 | 481 | ||
| 480 | const std::array<Regs::TevStageConfig,6> GetTevStages() const { | 482 | const std::array<Regs::TevStageConfig, 6> GetTevStages() const { |
| 481 | return {{ tev_stage0, tev_stage1, | 483 | return {{tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5}}; |
| 482 | tev_stage2, tev_stage3, | ||
| 483 | tev_stage4, tev_stage5 }}; | ||
| 484 | }; | 484 | }; |
| 485 | 485 | ||
| 486 | enum class BlendEquation : u32 { | 486 | enum class BlendEquation : u32 { |
| 487 | Add = 0, | 487 | Add = 0, |
| 488 | Subtract = 1, | 488 | Subtract = 1, |
| 489 | ReverseSubtract = 2, | 489 | ReverseSubtract = 2, |
| 490 | Min = 3, | 490 | Min = 3, |
| 491 | Max = 4, | 491 | Max = 4, |
| 492 | }; | 492 | }; |
| 493 | 493 | ||
| 494 | enum class BlendFactor : u32 { | 494 | enum class BlendFactor : u32 { |
| 495 | Zero = 0, | 495 | Zero = 0, |
| 496 | One = 1, | 496 | One = 1, |
| 497 | SourceColor = 2, | 497 | SourceColor = 2, |
| 498 | OneMinusSourceColor = 3, | 498 | OneMinusSourceColor = 3, |
| 499 | DestColor = 4, | 499 | DestColor = 4, |
| 500 | OneMinusDestColor = 5, | 500 | OneMinusDestColor = 5, |
| 501 | SourceAlpha = 6, | 501 | SourceAlpha = 6, |
| 502 | OneMinusSourceAlpha = 7, | 502 | OneMinusSourceAlpha = 7, |
| 503 | DestAlpha = 8, | 503 | DestAlpha = 8, |
| 504 | OneMinusDestAlpha = 9, | 504 | OneMinusDestAlpha = 9, |
| 505 | ConstantColor = 10, | 505 | ConstantColor = 10, |
| 506 | OneMinusConstantColor = 11, | 506 | OneMinusConstantColor = 11, |
| 507 | ConstantAlpha = 12, | 507 | ConstantAlpha = 12, |
| 508 | OneMinusConstantAlpha = 13, | 508 | OneMinusConstantAlpha = 13, |
| 509 | SourceAlphaSaturate = 14, | 509 | SourceAlphaSaturate = 14, |
| 510 | }; | 510 | }; |
| 511 | 511 | ||
| 512 | enum class CompareFunc : u32 { | 512 | enum class CompareFunc : u32 { |
| 513 | Never = 0, | 513 | Never = 0, |
| 514 | Always = 1, | 514 | Always = 1, |
| 515 | Equal = 2, | 515 | Equal = 2, |
| 516 | NotEqual = 3, | 516 | NotEqual = 3, |
| 517 | LessThan = 4, | 517 | LessThan = 4, |
| 518 | LessThanOrEqual = 5, | 518 | LessThanOrEqual = 5, |
| 519 | GreaterThan = 6, | 519 | GreaterThan = 6, |
| 520 | GreaterThanOrEqual = 7, | 520 | GreaterThanOrEqual = 7, |
| 521 | }; | 521 | }; |
| 522 | 522 | ||
| 523 | enum class StencilAction : u32 { | 523 | enum class StencilAction : u32 { |
| 524 | Keep = 0, | 524 | Keep = 0, |
| 525 | Zero = 1, | 525 | Zero = 1, |
| 526 | Replace = 2, | 526 | Replace = 2, |
| 527 | Increment = 3, | 527 | Increment = 3, |
| 528 | Decrement = 4, | 528 | Decrement = 4, |
| 529 | Invert = 5, | 529 | Invert = 5, |
| 530 | IncrementWrap = 6, | 530 | IncrementWrap = 6, |
| 531 | DecrementWrap = 7 | 531 | DecrementWrap = 7, |
| 532 | }; | 532 | }; |
| 533 | 533 | ||
| 534 | struct { | 534 | struct { |
| @@ -538,8 +538,8 @@ struct Regs { | |||
| 538 | }; | 538 | }; |
| 539 | 539 | ||
| 540 | union { | 540 | union { |
| 541 | BitField< 0, 8, BlendEquation> blend_equation_rgb; | 541 | BitField<0, 8, BlendEquation> blend_equation_rgb; |
| 542 | BitField< 8, 8, BlendEquation> blend_equation_a; | 542 | BitField<8, 8, BlendEquation> blend_equation_a; |
| 543 | 543 | ||
| 544 | BitField<16, 4, BlendFactor> factor_source_rgb; | 544 | BitField<16, 4, BlendFactor> factor_source_rgb; |
| 545 | BitField<20, 4, BlendFactor> factor_dest_rgb; | 545 | BitField<20, 4, BlendFactor> factor_dest_rgb; |
| @@ -554,16 +554,16 @@ struct Regs { | |||
| 554 | 554 | ||
| 555 | union { | 555 | union { |
| 556 | u32 raw; | 556 | u32 raw; |
| 557 | BitField< 0, 8, u32> r; | 557 | BitField<0, 8, u32> r; |
| 558 | BitField< 8, 8, u32> g; | 558 | BitField<8, 8, u32> g; |
| 559 | BitField<16, 8, u32> b; | 559 | BitField<16, 8, u32> b; |
| 560 | BitField<24, 8, u32> a; | 560 | BitField<24, 8, u32> a; |
| 561 | } blend_const; | 561 | } blend_const; |
| 562 | 562 | ||
| 563 | union { | 563 | union { |
| 564 | BitField< 0, 1, u32> enable; | 564 | BitField<0, 1, u32> enable; |
| 565 | BitField< 4, 3, CompareFunc> func; | 565 | BitField<4, 3, CompareFunc> func; |
| 566 | BitField< 8, 8, u32> ref; | 566 | BitField<8, 8, u32> ref; |
| 567 | } alpha_test; | 567 | } alpha_test; |
| 568 | 568 | ||
| 569 | struct { | 569 | struct { |
| @@ -572,13 +572,13 @@ struct Regs { | |||
| 572 | u32 raw_func; | 572 | u32 raw_func; |
| 573 | 573 | ||
| 574 | // If true, enable stencil testing | 574 | // If true, enable stencil testing |
| 575 | BitField< 0, 1, u32> enable; | 575 | BitField<0, 1, u32> enable; |
| 576 | 576 | ||
| 577 | // Comparison operation for stencil testing | 577 | // Comparison operation for stencil testing |
| 578 | BitField< 4, 3, CompareFunc> func; | 578 | BitField<4, 3, CompareFunc> func; |
| 579 | 579 | ||
| 580 | // Mask used to control writing to the stencil buffer | 580 | // Mask used to control writing to the stencil buffer |
| 581 | BitField< 8, 8, u32> write_mask; | 581 | BitField<8, 8, u32> write_mask; |
| 582 | 582 | ||
| 583 | // Value to compare against for stencil testing | 583 | // Value to compare against for stencil testing |
| 584 | BitField<16, 8, u32> reference_value; | 584 | BitField<16, 8, u32> reference_value; |
| @@ -592,21 +592,21 @@ struct Regs { | |||
| 592 | u32 raw_op; | 592 | u32 raw_op; |
| 593 | 593 | ||
| 594 | // Action to perform when the stencil test fails | 594 | // Action to perform when the stencil test fails |
| 595 | BitField< 0, 3, StencilAction> action_stencil_fail; | 595 | BitField<0, 3, StencilAction> action_stencil_fail; |
| 596 | 596 | ||
| 597 | // Action to perform when stencil testing passed but depth testing fails | 597 | // Action to perform when stencil testing passed but depth testing fails |
| 598 | BitField< 4, 3, StencilAction> action_depth_fail; | 598 | BitField<4, 3, StencilAction> action_depth_fail; |
| 599 | 599 | ||
| 600 | // Action to perform when both stencil and depth testing pass | 600 | // Action to perform when both stencil and depth testing pass |
| 601 | BitField< 8, 3, StencilAction> action_depth_pass; | 601 | BitField<8, 3, StencilAction> action_depth_pass; |
| 602 | }; | 602 | }; |
| 603 | } stencil_test; | 603 | } stencil_test; |
| 604 | 604 | ||
| 605 | union { | 605 | union { |
| 606 | BitField< 0, 1, u32> depth_test_enable; | 606 | BitField<0, 1, u32> depth_test_enable; |
| 607 | BitField< 4, 3, CompareFunc> depth_test_func; | 607 | BitField<4, 3, CompareFunc> depth_test_func; |
| 608 | BitField< 8, 1, u32> red_enable; | 608 | BitField<8, 1, u32> red_enable; |
| 609 | BitField< 9, 1, u32> green_enable; | 609 | BitField<9, 1, u32> green_enable; |
| 610 | BitField<10, 1, u32> blue_enable; | 610 | BitField<10, 1, u32> blue_enable; |
| 611 | BitField<11, 1, u32> alpha_enable; | 611 | BitField<11, 1, u32> alpha_enable; |
| 612 | BitField<12, 1, u32> depth_write_enable; | 612 | BitField<12, 1, u32> depth_write_enable; |
| @@ -617,16 +617,16 @@ struct Regs { | |||
| 617 | 617 | ||
| 618 | // Components are laid out in reverse byte order, most significant bits first. | 618 | // Components are laid out in reverse byte order, most significant bits first. |
| 619 | enum class ColorFormat : u32 { | 619 | enum class ColorFormat : u32 { |
| 620 | RGBA8 = 0, | 620 | RGBA8 = 0, |
| 621 | RGB8 = 1, | 621 | RGB8 = 1, |
| 622 | RGB5A1 = 2, | 622 | RGB5A1 = 2, |
| 623 | RGB565 = 3, | 623 | RGB565 = 3, |
| 624 | RGBA4 = 4, | 624 | RGBA4 = 4, |
| 625 | }; | 625 | }; |
| 626 | 626 | ||
| 627 | enum class DepthFormat : u32 { | 627 | enum class DepthFormat : u32 { |
| 628 | D16 = 0, | 628 | D16 = 0, |
| 629 | D24 = 2, | 629 | D24 = 2, |
| 630 | D24S8 = 3, | 630 | D24S8 = 3, |
| 631 | }; | 631 | }; |
| 632 | 632 | ||
| @@ -673,7 +673,7 @@ struct Regs { | |||
| 673 | // while the height is stored as the actual height minus one. | 673 | // while the height is stored as the actual height minus one. |
| 674 | // Hence, don't access these fields directly but use the accessors | 674 | // Hence, don't access these fields directly but use the accessors |
| 675 | // GetWidth() and GetHeight() instead. | 675 | // GetWidth() and GetHeight() instead. |
| 676 | BitField< 0, 11, u32> width; | 676 | BitField<0, 11, u32> width; |
| 677 | BitField<12, 10, u32> height; | 677 | BitField<12, 10, u32> height; |
| 678 | }; | 678 | }; |
| 679 | 679 | ||
| @@ -759,10 +759,12 @@ struct Regs { | |||
| 759 | 759 | ||
| 760 | /// Selects which lighting components are affected by fresnel | 760 | /// Selects which lighting components are affected by fresnel |
| 761 | enum class LightingFresnelSelector { | 761 | enum class LightingFresnelSelector { |
| 762 | None = 0, ///< Fresnel is disabled | 762 | None = 0, ///< Fresnel is disabled |
| 763 | PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel | 763 | PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel |
| 764 | SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel | 764 | SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel |
| 765 | Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel | 765 | Both = |
| 766 | PrimaryAlpha | | ||
| 767 | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel | ||
| 766 | }; | 768 | }; |
| 767 | 769 | ||
| 768 | /// Factor used to scale the output of a lighting LUT | 770 | /// Factor used to scale the output of a lighting LUT |
| @@ -789,57 +791,63 @@ struct Regs { | |||
| 789 | }; | 791 | }; |
| 790 | 792 | ||
| 791 | union LightColor { | 793 | union LightColor { |
| 792 | BitField< 0, 10, u32> b; | 794 | BitField<0, 10, u32> b; |
| 793 | BitField<10, 10, u32> g; | 795 | BitField<10, 10, u32> g; |
| 794 | BitField<20, 10, u32> r; | 796 | BitField<20, 10, u32> r; |
| 795 | 797 | ||
| 796 | Math::Vec3f ToVec3f() const { | 798 | Math::Vec3f ToVec3f() const { |
| 797 | // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component | 799 | // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color |
| 800 | // component | ||
| 798 | return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); | 801 | return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); |
| 799 | } | 802 | } |
| 800 | }; | 803 | }; |
| 801 | 804 | ||
| 802 | /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration | 805 | /// Returns true if the specified lighting sampler is supported by the current Pica lighting |
| 806 | /// configuration | ||
| 803 | static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { | 807 | static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { |
| 804 | switch (sampler) { | 808 | switch (sampler) { |
| 805 | case LightingSampler::Distribution0: | 809 | case LightingSampler::Distribution0: |
| 806 | return (config != LightingConfig::Config1); | 810 | return (config != LightingConfig::Config1); |
| 807 | 811 | ||
| 808 | case LightingSampler::Distribution1: | 812 | case LightingSampler::Distribution1: |
| 809 | return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); | 813 | return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && |
| 814 | (config != LightingConfig::Config5); | ||
| 810 | 815 | ||
| 811 | case LightingSampler::Fresnel: | 816 | case LightingSampler::Fresnel: |
| 812 | return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); | 817 | return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && |
| 818 | (config != LightingConfig::Config4); | ||
| 813 | 819 | ||
| 814 | case LightingSampler::ReflectRed: | 820 | case LightingSampler::ReflectRed: |
| 815 | return (config != LightingConfig::Config3); | 821 | return (config != LightingConfig::Config3); |
| 816 | 822 | ||
| 817 | case LightingSampler::ReflectGreen: | 823 | case LightingSampler::ReflectGreen: |
| 818 | case LightingSampler::ReflectBlue: | 824 | case LightingSampler::ReflectBlue: |
| 819 | return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); | 825 | return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || |
| 826 | (config == LightingConfig::Config7); | ||
| 820 | default: | 827 | default: |
| 821 | UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached " | 828 | UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached " |
| 822 | "unreachable section, sampler should be one " | 829 | "unreachable section, sampler should be one " |
| 823 | "of Distribution0, Distribution1, Fresnel, " | 830 | "of Distribution0, Distribution1, Fresnel, " |
| 824 | "ReflectRed, ReflectGreen or ReflectBlue, instead " | 831 | "ReflectRed, ReflectGreen or ReflectBlue, instead " |
| 825 | "got %i", static_cast<int>(config)); | 832 | "got %i", |
| 833 | static_cast<int>(config)); | ||
| 826 | } | 834 | } |
| 827 | } | 835 | } |
| 828 | 836 | ||
| 829 | struct { | 837 | struct { |
| 830 | struct LightSrc { | 838 | struct LightSrc { |
| 831 | LightColor specular_0; // material.specular_0 * light.specular_0 | 839 | LightColor specular_0; // material.specular_0 * light.specular_0 |
| 832 | LightColor specular_1; // material.specular_1 * light.specular_1 | 840 | LightColor specular_1; // material.specular_1 * light.specular_1 |
| 833 | LightColor diffuse; // material.diffuse * light.diffuse | 841 | LightColor diffuse; // material.diffuse * light.diffuse |
| 834 | LightColor ambient; // material.ambient * light.ambient | 842 | LightColor ambient; // material.ambient * light.ambient |
| 835 | 843 | ||
| 836 | // Encoded as 16-bit floating point | 844 | // Encoded as 16-bit floating point |
| 837 | union { | 845 | union { |
| 838 | BitField< 0, 16, u32> x; | 846 | BitField<0, 16, u32> x; |
| 839 | BitField<16, 16, u32> y; | 847 | BitField<16, 16, u32> y; |
| 840 | }; | 848 | }; |
| 841 | union { | 849 | union { |
| 842 | BitField< 0, 16, u32> z; | 850 | BitField<0, 16, u32> z; |
| 843 | }; | 851 | }; |
| 844 | 852 | ||
| 845 | INSERT_PADDING_WORDS(0x3); | 853 | INSERT_PADDING_WORDS(0x3); |
| @@ -854,7 +862,8 @@ struct Regs { | |||
| 854 | 862 | ||
| 855 | INSERT_PADDING_WORDS(0x4); | 863 | INSERT_PADDING_WORDS(0x4); |
| 856 | }; | 864 | }; |
| 857 | static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); | 865 | static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), |
| 866 | "LightSrc structure must be 0x10 words"); | ||
| 858 | 867 | ||
| 859 | LightSrc light[8]; | 868 | LightSrc light[8]; |
| 860 | LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) | 869 | LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) |
| @@ -862,8 +871,8 @@ struct Regs { | |||
| 862 | BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 | 871 | BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 |
| 863 | 872 | ||
| 864 | union { | 873 | union { |
| 865 | BitField< 2, 2, LightingFresnelSelector> fresnel_selector; | 874 | BitField<2, 2, LightingFresnelSelector> fresnel_selector; |
| 866 | BitField< 4, 4, LightingConfig> config; | 875 | BitField<4, 4, LightingConfig> config; |
| 867 | BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 | 876 | BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 |
| 868 | BitField<27, 1, u32> clamp_highlights; | 877 | BitField<27, 1, u32> clamp_highlights; |
| 869 | BitField<28, 2, LightingBumpMode> bump_mode; | 878 | BitField<28, 2, LightingBumpMode> bump_mode; |
| @@ -892,16 +901,17 @@ struct Regs { | |||
| 892 | } config1; | 901 | } config1; |
| 893 | 902 | ||
| 894 | bool IsDistAttenDisabled(unsigned index) const { | 903 | bool IsDistAttenDisabled(unsigned index) const { |
| 895 | const unsigned disable[] = { config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, | 904 | const unsigned disable[] = { |
| 896 | config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, | 905 | config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, |
| 897 | config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, | 906 | config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, |
| 898 | config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7 }; | 907 | config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, |
| 908 | config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7}; | ||
| 899 | return disable[index] != 0; | 909 | return disable[index] != 0; |
| 900 | } | 910 | } |
| 901 | 911 | ||
| 902 | union { | 912 | union { |
| 903 | BitField<0, 8, u32> index; ///< Index at which to set data in the LUT | 913 | BitField<0, 8, u32> index; ///< Index at which to set data in the LUT |
| 904 | BitField<8, 5, u32> type; ///< Type of LUT for which to set data | 914 | BitField<8, 5, u32> type; ///< Type of LUT for which to set data |
| 905 | } lut_config; | 915 | } lut_config; |
| 906 | 916 | ||
| 907 | BitField<0, 1, u32> disable; | 917 | BitField<0, 1, u32> disable; |
| @@ -917,9 +927,9 @@ struct Regs { | |||
| 917 | // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in | 927 | // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in |
| 918 | // the range of (0.0, 1.0). | 928 | // the range of (0.0, 1.0). |
| 919 | union { | 929 | union { |
| 920 | BitField< 1, 1, u32> disable_d0; | 930 | BitField<1, 1, u32> disable_d0; |
| 921 | BitField< 5, 1, u32> disable_d1; | 931 | BitField<5, 1, u32> disable_d1; |
| 922 | BitField< 9, 1, u32> disable_sp; | 932 | BitField<9, 1, u32> disable_sp; |
| 923 | BitField<13, 1, u32> disable_fr; | 933 | BitField<13, 1, u32> disable_fr; |
| 924 | BitField<17, 1, u32> disable_rb; | 934 | BitField<17, 1, u32> disable_rb; |
| 925 | BitField<21, 1, u32> disable_rg; | 935 | BitField<21, 1, u32> disable_rg; |
| @@ -927,9 +937,9 @@ struct Regs { | |||
| 927 | } abs_lut_input; | 937 | } abs_lut_input; |
| 928 | 938 | ||
| 929 | union { | 939 | union { |
| 930 | BitField< 0, 3, LightingLutInput> d0; | 940 | BitField<0, 3, LightingLutInput> d0; |
| 931 | BitField< 4, 3, LightingLutInput> d1; | 941 | BitField<4, 3, LightingLutInput> d1; |
| 932 | BitField< 8, 3, LightingLutInput> sp; | 942 | BitField<8, 3, LightingLutInput> sp; |
| 933 | BitField<12, 3, LightingLutInput> fr; | 943 | BitField<12, 3, LightingLutInput> fr; |
| 934 | BitField<16, 3, LightingLutInput> rb; | 944 | BitField<16, 3, LightingLutInput> rb; |
| 935 | BitField<20, 3, LightingLutInput> rg; | 945 | BitField<20, 3, LightingLutInput> rg; |
| @@ -937,9 +947,9 @@ struct Regs { | |||
| 937 | } lut_input; | 947 | } lut_input; |
| 938 | 948 | ||
| 939 | union { | 949 | union { |
| 940 | BitField< 0, 3, LightingScale> d0; | 950 | BitField<0, 3, LightingScale> d0; |
| 941 | BitField< 4, 3, LightingScale> d1; | 951 | BitField<4, 3, LightingScale> d1; |
| 942 | BitField< 8, 3, LightingScale> sp; | 952 | BitField<8, 3, LightingScale> sp; |
| 943 | BitField<12, 3, LightingScale> fr; | 953 | BitField<12, 3, LightingScale> fr; |
| 944 | BitField<16, 3, LightingScale> rb; | 954 | BitField<16, 3, LightingScale> rb; |
| 945 | BitField<20, 3, LightingScale> rg; | 955 | BitField<20, 3, LightingScale> rg; |
| @@ -972,9 +982,9 @@ struct Regs { | |||
| 972 | // above), the first N slots below will be set to integers within the range of 0-7, | 982 | // above), the first N slots below will be set to integers within the range of 0-7, |
| 973 | // corresponding to the actual light that is enabled for each slot. | 983 | // corresponding to the actual light that is enabled for each slot. |
| 974 | 984 | ||
| 975 | BitField< 0, 3, u32> slot_0; | 985 | BitField<0, 3, u32> slot_0; |
| 976 | BitField< 4, 3, u32> slot_1; | 986 | BitField<4, 3, u32> slot_1; |
| 977 | BitField< 8, 3, u32> slot_2; | 987 | BitField<8, 3, u32> slot_2; |
| 978 | BitField<12, 3, u32> slot_3; | 988 | BitField<12, 3, u32> slot_3; |
| 979 | BitField<16, 3, u32> slot_4; | 989 | BitField<16, 3, u32> slot_4; |
| 980 | BitField<20, 3, u32> slot_5; | 990 | BitField<20, 3, u32> slot_5; |
| @@ -982,7 +992,8 @@ struct Regs { | |||
| 982 | BitField<28, 3, u32> slot_7; | 992 | BitField<28, 3, u32> slot_7; |
| 983 | 993 | ||
| 984 | unsigned GetNum(unsigned index) const { | 994 | unsigned GetNum(unsigned index) const { |
| 985 | const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; | 995 | const unsigned enable_slots[] = {slot_0, slot_1, slot_2, slot_3, |
| 996 | slot_4, slot_5, slot_6, slot_7}; | ||
| 986 | return enable_slots[index]; | 997 | return enable_slots[index]; |
| 987 | } | 998 | } |
| 988 | } light_enable; | 999 | } light_enable; |
| @@ -1006,58 +1017,54 @@ struct Regs { | |||
| 1006 | 1017 | ||
| 1007 | // Descriptor for internal vertex attributes | 1018 | // Descriptor for internal vertex attributes |
| 1008 | union { | 1019 | union { |
| 1009 | BitField< 0, 2, VertexAttributeFormat> format0; // size of one element | 1020 | BitField<0, 2, VertexAttributeFormat> format0; // size of one element |
| 1010 | BitField< 2, 2, u64> size0; // number of elements minus 1 | 1021 | BitField<2, 2, u64> size0; // number of elements minus 1 |
| 1011 | BitField< 4, 2, VertexAttributeFormat> format1; | 1022 | BitField<4, 2, VertexAttributeFormat> format1; |
| 1012 | BitField< 6, 2, u64> size1; | 1023 | BitField<6, 2, u64> size1; |
| 1013 | BitField< 8, 2, VertexAttributeFormat> format2; | 1024 | BitField<8, 2, VertexAttributeFormat> format2; |
| 1014 | BitField<10, 2, u64> size2; | 1025 | BitField<10, 2, u64> size2; |
| 1015 | BitField<12, 2, VertexAttributeFormat> format3; | 1026 | BitField<12, 2, VertexAttributeFormat> format3; |
| 1016 | BitField<14, 2, u64> size3; | 1027 | BitField<14, 2, u64> size3; |
| 1017 | BitField<16, 2, VertexAttributeFormat> format4; | 1028 | BitField<16, 2, VertexAttributeFormat> format4; |
| 1018 | BitField<18, 2, u64> size4; | 1029 | BitField<18, 2, u64> size4; |
| 1019 | BitField<20, 2, VertexAttributeFormat> format5; | 1030 | BitField<20, 2, VertexAttributeFormat> format5; |
| 1020 | BitField<22, 2, u64> size5; | 1031 | BitField<22, 2, u64> size5; |
| 1021 | BitField<24, 2, VertexAttributeFormat> format6; | 1032 | BitField<24, 2, VertexAttributeFormat> format6; |
| 1022 | BitField<26, 2, u64> size6; | 1033 | BitField<26, 2, u64> size6; |
| 1023 | BitField<28, 2, VertexAttributeFormat> format7; | 1034 | BitField<28, 2, VertexAttributeFormat> format7; |
| 1024 | BitField<30, 2, u64> size7; | 1035 | BitField<30, 2, u64> size7; |
| 1025 | BitField<32, 2, VertexAttributeFormat> format8; | 1036 | BitField<32, 2, VertexAttributeFormat> format8; |
| 1026 | BitField<34, 2, u64> size8; | 1037 | BitField<34, 2, u64> size8; |
| 1027 | BitField<36, 2, VertexAttributeFormat> format9; | 1038 | BitField<36, 2, VertexAttributeFormat> format9; |
| 1028 | BitField<38, 2, u64> size9; | 1039 | BitField<38, 2, u64> size9; |
| 1029 | BitField<40, 2, VertexAttributeFormat> format10; | 1040 | BitField<40, 2, VertexAttributeFormat> format10; |
| 1030 | BitField<42, 2, u64> size10; | 1041 | BitField<42, 2, u64> size10; |
| 1031 | BitField<44, 2, VertexAttributeFormat> format11; | 1042 | BitField<44, 2, VertexAttributeFormat> format11; |
| 1032 | BitField<46, 2, u64> size11; | 1043 | BitField<46, 2, u64> size11; |
| 1033 | 1044 | ||
| 1034 | BitField<48, 12, u64> attribute_mask; | 1045 | BitField<48, 12, u64> attribute_mask; |
| 1035 | 1046 | ||
| 1036 | // number of total attributes minus 1 | 1047 | // number of total attributes minus 1 |
| 1037 | BitField<60, 4, u64> num_extra_attributes; | 1048 | BitField<60, 4, u64> num_extra_attributes; |
| 1038 | }; | 1049 | }; |
| 1039 | 1050 | ||
| 1040 | inline VertexAttributeFormat GetFormat(int n) const { | 1051 | inline VertexAttributeFormat GetFormat(int n) const { |
| 1041 | VertexAttributeFormat formats[] = { | 1052 | VertexAttributeFormat formats[] = {format0, format1, format2, format3, |
| 1042 | format0, format1, format2, format3, | 1053 | format4, format5, format6, format7, |
| 1043 | format4, format5, format6, format7, | 1054 | format8, format9, format10, format11}; |
| 1044 | format8, format9, format10, format11 | ||
| 1045 | }; | ||
| 1046 | return formats[n]; | 1055 | return formats[n]; |
| 1047 | } | 1056 | } |
| 1048 | 1057 | ||
| 1049 | inline int GetNumElements(int n) const { | 1058 | inline int GetNumElements(int n) const { |
| 1050 | u64 sizes[] = { | 1059 | u64 sizes[] = {size0, size1, size2, size3, size4, size5, |
| 1051 | size0, size1, size2, size3, | 1060 | size6, size7, size8, size9, size10, size11}; |
| 1052 | size4, size5, size6, size7, | 1061 | return (int)sizes[n] + 1; |
| 1053 | size8, size9, size10, size11 | ||
| 1054 | }; | ||
| 1055 | return (int)sizes[n]+1; | ||
| 1056 | } | 1062 | } |
| 1057 | 1063 | ||
| 1058 | inline int GetElementSizeInBytes(int n) const { | 1064 | inline int GetElementSizeInBytes(int n) const { |
| 1059 | return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 : | 1065 | return (GetFormat(n) == VertexAttributeFormat::FLOAT) |
| 1060 | (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; | 1066 | ? 4 |
| 1067 | : (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; | ||
| 1061 | } | 1068 | } |
| 1062 | 1069 | ||
| 1063 | inline int GetStride(int n) const { | 1070 | inline int GetStride(int n) const { |
| @@ -1069,7 +1076,7 @@ struct Regs { | |||
| 1069 | } | 1076 | } |
| 1070 | 1077 | ||
| 1071 | inline int GetNumTotalAttributes() const { | 1078 | inline int GetNumTotalAttributes() const { |
| 1072 | return (int)num_extra_attributes+1; | 1079 | return (int)num_extra_attributes + 1; |
| 1073 | } | 1080 | } |
| 1074 | 1081 | ||
| 1075 | // Attribute loaders map the source vertex data to input attributes | 1082 | // Attribute loaders map the source vertex data to input attributes |
| @@ -1079,9 +1086,9 @@ struct Regs { | |||
| 1079 | u32 data_offset; | 1086 | u32 data_offset; |
| 1080 | 1087 | ||
| 1081 | union { | 1088 | union { |
| 1082 | BitField< 0, 4, u64> comp0; | 1089 | BitField<0, 4, u64> comp0; |
| 1083 | BitField< 4, 4, u64> comp1; | 1090 | BitField<4, 4, u64> comp1; |
| 1084 | BitField< 8, 4, u64> comp2; | 1091 | BitField<8, 4, u64> comp2; |
| 1085 | BitField<12, 4, u64> comp3; | 1092 | BitField<12, 4, u64> comp3; |
| 1086 | BitField<16, 4, u64> comp4; | 1093 | BitField<16, 4, u64> comp4; |
| 1087 | BitField<20, 4, u64> comp5; | 1094 | BitField<20, 4, u64> comp5; |
| @@ -1099,11 +1106,8 @@ struct Regs { | |||
| 1099 | }; | 1106 | }; |
| 1100 | 1107 | ||
| 1101 | inline int GetComponent(int n) const { | 1108 | inline int GetComponent(int n) const { |
| 1102 | u64 components[] = { | 1109 | u64 components[] = {comp0, comp1, comp2, comp3, comp4, comp5, |
| 1103 | comp0, comp1, comp2, comp3, | 1110 | comp6, comp7, comp8, comp9, comp10, comp11}; |
| 1104 | comp4, comp5, comp6, comp7, | ||
| 1105 | comp8, comp9, comp10, comp11 | ||
| 1106 | }; | ||
| 1107 | return (int)components[n]; | 1111 | return (int)components[n]; |
| 1108 | } | 1112 | } |
| 1109 | } attribute_loaders[12]; | 1113 | } attribute_loaders[12]; |
| @@ -1157,8 +1161,8 @@ struct Regs { | |||
| 1157 | // kicked off. | 1161 | // kicked off. |
| 1158 | // 2) Games can configure these registers to provide a command list subroutine mechanism. | 1162 | // 2) Games can configure these registers to provide a command list subroutine mechanism. |
| 1159 | 1163 | ||
| 1160 | BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer | 1164 | BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer |
| 1161 | BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer | 1165 | BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer |
| 1162 | u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to | 1166 | u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to |
| 1163 | 1167 | ||
| 1164 | unsigned GetSize(unsigned index) const { | 1168 | unsigned GetSize(unsigned index) const { |
| @@ -1176,7 +1180,7 @@ struct Regs { | |||
| 1176 | 1180 | ||
| 1177 | enum class GPUMode : u32 { | 1181 | enum class GPUMode : u32 { |
| 1178 | Drawing = 0, | 1182 | Drawing = 0, |
| 1179 | Configuring = 1 | 1183 | Configuring = 1, |
| 1180 | }; | 1184 | }; |
| 1181 | 1185 | ||
| 1182 | GPUMode gpu_mode; | 1186 | GPUMode gpu_mode; |
| @@ -1184,9 +1188,9 @@ struct Regs { | |||
| 1184 | INSERT_PADDING_WORDS(0x18); | 1188 | INSERT_PADDING_WORDS(0x18); |
| 1185 | 1189 | ||
| 1186 | enum class TriangleTopology : u32 { | 1190 | enum class TriangleTopology : u32 { |
| 1187 | List = 0, | 1191 | List = 0, |
| 1188 | Strip = 1, | 1192 | Strip = 1, |
| 1189 | Fan = 2, | 1193 | Fan = 2, |
| 1190 | Shader = 3, // Programmable setup unit implemented in a geometry shader | 1194 | Shader = 3, // Programmable setup unit implemented in a geometry shader |
| 1191 | }; | 1195 | }; |
| 1192 | 1196 | ||
| @@ -1200,8 +1204,8 @@ struct Regs { | |||
| 1200 | BitField<0, 16, u32> bool_uniforms; | 1204 | BitField<0, 16, u32> bool_uniforms; |
| 1201 | 1205 | ||
| 1202 | union { | 1206 | union { |
| 1203 | BitField< 0, 8, u32> x; | 1207 | BitField<0, 8, u32> x; |
| 1204 | BitField< 8, 8, u32> y; | 1208 | BitField<8, 8, u32> y; |
| 1205 | BitField<16, 8, u32> z; | 1209 | BitField<16, 8, u32> z; |
| 1206 | BitField<24, 8, u32> w; | 1210 | BitField<24, 8, u32> w; |
| 1207 | } int_uniforms[4]; | 1211 | } int_uniforms[4]; |
| @@ -1217,9 +1221,9 @@ struct Regs { | |||
| 1217 | BitField<0, 16, u32> main_offset; | 1221 | BitField<0, 16, u32> main_offset; |
| 1218 | 1222 | ||
| 1219 | union { | 1223 | union { |
| 1220 | BitField< 0, 4, u64> attribute0_register; | 1224 | BitField<0, 4, u64> attribute0_register; |
| 1221 | BitField< 4, 4, u64> attribute1_register; | 1225 | BitField<4, 4, u64> attribute1_register; |
| 1222 | BitField< 8, 4, u64> attribute2_register; | 1226 | BitField<8, 4, u64> attribute2_register; |
| 1223 | BitField<12, 4, u64> attribute3_register; | 1227 | BitField<12, 4, u64> attribute3_register; |
| 1224 | BitField<16, 4, u64> attribute4_register; | 1228 | BitField<16, 4, u64> attribute4_register; |
| 1225 | BitField<20, 4, u64> attribute5_register; | 1229 | BitField<20, 4, u64> attribute5_register; |
| @@ -1236,10 +1240,12 @@ struct Regs { | |||
| 1236 | 1240 | ||
| 1237 | int GetRegisterForAttribute(int attribute_index) const { | 1241 | int GetRegisterForAttribute(int attribute_index) const { |
| 1238 | u64 fields[] = { | 1242 | u64 fields[] = { |
| 1239 | attribute0_register, attribute1_register, attribute2_register, attribute3_register, | 1243 | attribute0_register, attribute1_register, attribute2_register, |
| 1240 | attribute4_register, attribute5_register, attribute6_register, attribute7_register, | 1244 | attribute3_register, attribute4_register, attribute5_register, |
| 1241 | attribute8_register, attribute9_register, attribute10_register, attribute11_register, | 1245 | attribute6_register, attribute7_register, attribute8_register, |
| 1242 | attribute12_register, attribute13_register, attribute14_register, attribute15_register, | 1246 | attribute9_register, attribute10_register, attribute11_register, |
| 1247 | attribute12_register, attribute13_register, attribute14_register, | ||
| 1248 | attribute15_register, | ||
| 1243 | }; | 1249 | }; |
| 1244 | return (int)fields[attribute_index]; | 1250 | return (int)fields[attribute_index]; |
| 1245 | } | 1251 | } |
| @@ -1251,10 +1257,9 @@ struct Regs { | |||
| 1251 | INSERT_PADDING_WORDS(0x2); | 1257 | INSERT_PADDING_WORDS(0x2); |
| 1252 | 1258 | ||
| 1253 | struct { | 1259 | struct { |
| 1254 | enum Format : u32 | 1260 | enum Format : u32 { |
| 1255 | { | ||
| 1256 | FLOAT24 = 0, | 1261 | FLOAT24 = 0, |
| 1257 | FLOAT32 = 1 | 1262 | FLOAT32 = 1, |
| 1258 | }; | 1263 | }; |
| 1259 | 1264 | ||
| 1260 | bool IsFloat32() const { | 1265 | bool IsFloat32() const { |
| @@ -1263,7 +1268,8 @@ struct Regs { | |||
| 1263 | 1268 | ||
| 1264 | union { | 1269 | union { |
| 1265 | // Index of the next uniform to write to | 1270 | // Index of the next uniform to write to |
| 1266 | // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices | 1271 | // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid |
| 1272 | // indices | ||
| 1267 | // TODO: Maybe the uppermost index is for the geometry shader? Investigate! | 1273 | // TODO: Maybe the uppermost index is for the geometry shader? Investigate! |
| 1268 | BitField<0, 7, u32> index; | 1274 | BitField<0, 7, u32> index; |
| 1269 | 1275 | ||
| @@ -1315,12 +1321,12 @@ struct Regs { | |||
| 1315 | return sizeof(Regs) / sizeof(u32); | 1321 | return sizeof(Regs) / sizeof(u32); |
| 1316 | } | 1322 | } |
| 1317 | 1323 | ||
| 1318 | const u32& operator [] (int index) const { | 1324 | const u32& operator[](int index) const { |
| 1319 | const u32* content = reinterpret_cast<const u32*>(this); | 1325 | const u32* content = reinterpret_cast<const u32*>(this); |
| 1320 | return content[index]; | 1326 | return content[index]; |
| 1321 | } | 1327 | } |
| 1322 | 1328 | ||
| 1323 | u32& operator [] (int index) { | 1329 | u32& operator[](int index) { |
| 1324 | u32* content = reinterpret_cast<u32*>(this); | 1330 | u32* content = reinterpret_cast<u32*>(this); |
| 1325 | return content[index]; | 1331 | return content[index]; |
| 1326 | } | 1332 | } |
| @@ -1339,7 +1345,9 @@ private: | |||
| 1339 | // is technically allowed since C++11. This macro should be enabled once MSVC adds | 1345 | // is technically allowed since C++11. This macro should be enabled once MSVC adds |
| 1340 | // support for that. | 1346 | // support for that. |
| 1341 | #ifndef _MSC_VER | 1347 | #ifndef _MSC_VER |
| 1342 | #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") | 1348 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 1349 | static_assert(offsetof(Regs, field_name) == position * 4, \ | ||
| 1350 | "Field " #field_name " has invalid position") | ||
| 1343 | 1351 | ||
| 1344 | ASSERT_REG_POSITION(trigger_irq, 0x10); | 1352 | ASSERT_REG_POSITION(trigger_irq, 0x10); |
| 1345 | ASSERT_REG_POSITION(cull_mode, 0x40); | 1353 | ASSERT_REG_POSITION(cull_mode, 0x40); |
| @@ -1392,11 +1400,15 @@ ASSERT_REG_POSITION(vs, 0x2b0); | |||
| 1392 | #undef ASSERT_REG_POSITION | 1400 | #undef ASSERT_REG_POSITION |
| 1393 | #endif // !defined(_MSC_VER) | 1401 | #endif // !defined(_MSC_VER) |
| 1394 | 1402 | ||
| 1395 | static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); | 1403 | static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), |
| 1404 | "ShaderConfig structure has incorrect size"); | ||
| 1396 | 1405 | ||
| 1397 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. | 1406 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value |
| 1398 | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | 1407 | // anyway. |
| 1399 | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | 1408 | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), |
| 1409 | "Register set structure larger than it should be"); | ||
| 1410 | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), | ||
| 1411 | "Register set structure smaller than it should be"); | ||
| 1400 | 1412 | ||
| 1401 | /// Initialize Pica state | 1413 | /// Initialize Pica state |
| 1402 | void Init(); | 1414 | void Init(); |
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 01f4285a8..e4f2e6d5d 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -5,10 +5,8 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | |||
| 9 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 10 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 11 | |||
| 12 | #include "video_core/pica.h" | 10 | #include "video_core/pica.h" |
| 13 | #include "video_core/primitive_assembly.h" | 11 | #include "video_core/primitive_assembly.h" |
| 14 | #include "video_core/shader/shader.h" | 12 | #include "video_core/shader/shader.h" |
| @@ -33,7 +31,7 @@ struct State { | |||
| 33 | u32 raw; | 31 | u32 raw; |
| 34 | 32 | ||
| 35 | // LUT value, encoded as 12-bit fixed point, with 12 fraction bits | 33 | // LUT value, encoded as 12-bit fixed point, with 12 fraction bits |
| 36 | BitField< 0, 12, u32> value; // 0.0.12 fixed point | 34 | BitField<0, 12, u32> value; // 0.0.12 fixed point |
| 37 | 35 | ||
| 38 | // Used by HW for efficient interpolation, Citra does not use these | 36 | // Used by HW for efficient interpolation, Citra does not use these |
| 39 | BitField<12, 12, s32> difference; // 1.0.11 fixed point | 37 | BitField<12, 12, s32> difference; // 1.0.11 fixed point |
| @@ -51,8 +49,8 @@ struct State { | |||
| 51 | // Used for raw access | 49 | // Used for raw access |
| 52 | u32 raw; | 50 | u32 raw; |
| 53 | 51 | ||
| 54 | BitField< 0, 13, s32> difference; // 1.1.11 fixed point | 52 | BitField<0, 13, s32> difference; // 1.1.11 fixed point |
| 55 | BitField<13, 11, u32> value; // 0.0.11 fixed point | 53 | BitField<13, 11, u32> value; // 0.0.11 fixed point |
| 56 | }; | 54 | }; |
| 57 | 55 | ||
| 58 | std::array<LutEntry, 128> lut; | 56 | std::array<LutEntry, 128> lut; |
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 3b7bfbdca..5d7e10066 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include <cmath> | 7 | #include <cmath> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | |||
| 10 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 11 | 10 | ||
| 12 | namespace Pica { | 11 | namespace Pica { |
| @@ -22,7 +21,7 @@ namespace Pica { | |||
| 22 | * | 21 | * |
| 23 | * @todo Verify on HW if this conversion is sufficiently accurate. | 22 | * @todo Verify on HW if this conversion is sufficiently accurate. |
| 24 | */ | 23 | */ |
| 25 | template<unsigned M, unsigned E> | 24 | template <unsigned M, unsigned E> |
| 26 | struct Float { | 25 | struct Float { |
| 27 | public: | 26 | public: |
| 28 | static Float<M, E> FromFloat32(float val) { | 27 | static Float<M, E> FromFloat32(float val) { |
| @@ -58,7 +57,7 @@ public: | |||
| 58 | return value; | 57 | return value; |
| 59 | } | 58 | } |
| 60 | 59 | ||
| 61 | Float<M, E> operator * (const Float<M, E>& flt) const { | 60 | Float<M, E> operator*(const Float<M, E>& flt) const { |
| 62 | if ((this->value == 0.f && !std::isnan(flt.value)) || | 61 | if ((this->value == 0.f && !std::isnan(flt.value)) || |
| 63 | (flt.value == 0.f && !std::isnan(this->value))) | 62 | (flt.value == 0.f && !std::isnan(this->value))) |
| 64 | // PICA gives 0 instead of NaN when multiplying by inf | 63 | // PICA gives 0 instead of NaN when multiplying by inf |
| @@ -66,67 +65,68 @@ public: | |||
| 66 | return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); | 65 | return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); |
| 67 | } | 66 | } |
| 68 | 67 | ||
| 69 | Float<M, E> operator / (const Float<M, E>& flt) const { | 68 | Float<M, E> operator/(const Float<M, E>& flt) const { |
| 70 | return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); | 69 | return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); |
| 71 | } | 70 | } |
| 72 | 71 | ||
| 73 | Float<M, E> operator + (const Float<M, E>& flt) const { | 72 | Float<M, E> operator+(const Float<M, E>& flt) const { |
| 74 | return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); | 73 | return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); |
| 75 | } | 74 | } |
| 76 | 75 | ||
| 77 | Float<M, E> operator - (const Float<M, E>& flt) const { | 76 | Float<M, E> operator-(const Float<M, E>& flt) const { |
| 78 | return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); | 77 | return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); |
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | Float<M, E>& operator *= (const Float<M, E>& flt) { | 80 | Float<M, E>& operator*=(const Float<M, E>& flt) { |
| 82 | if ((this->value == 0.f && !std::isnan(flt.value)) || | 81 | if ((this->value == 0.f && !std::isnan(flt.value)) || |
| 83 | (flt.value == 0.f && !std::isnan(this->value))) | 82 | (flt.value == 0.f && !std::isnan(this->value))) |
| 84 | // PICA gives 0 instead of NaN when multiplying by inf | 83 | // PICA gives 0 instead of NaN when multiplying by inf |
| 85 | *this = Zero(); | 84 | *this = Zero(); |
| 86 | else value *= flt.ToFloat32(); | 85 | else |
| 86 | value *= flt.ToFloat32(); | ||
| 87 | return *this; | 87 | return *this; |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | Float<M, E>& operator /= (const Float<M, E>& flt) { | 90 | Float<M, E>& operator/=(const Float<M, E>& flt) { |
| 91 | value /= flt.ToFloat32(); | 91 | value /= flt.ToFloat32(); |
| 92 | return *this; | 92 | return *this; |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | Float<M, E>& operator += (const Float<M, E>& flt) { | 95 | Float<M, E>& operator+=(const Float<M, E>& flt) { |
| 96 | value += flt.ToFloat32(); | 96 | value += flt.ToFloat32(); |
| 97 | return *this; | 97 | return *this; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | Float<M, E>& operator -= (const Float<M, E>& flt) { | 100 | Float<M, E>& operator-=(const Float<M, E>& flt) { |
| 101 | value -= flt.ToFloat32(); | 101 | value -= flt.ToFloat32(); |
| 102 | return *this; | 102 | return *this; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | Float<M, E> operator - () const { | 105 | Float<M, E> operator-() const { |
| 106 | return Float<M, E>::FromFloat32(-ToFloat32()); | 106 | return Float<M, E>::FromFloat32(-ToFloat32()); |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | bool operator < (const Float<M, E>& flt) const { | 109 | bool operator<(const Float<M, E>& flt) const { |
| 110 | return ToFloat32() < flt.ToFloat32(); | 110 | return ToFloat32() < flt.ToFloat32(); |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | bool operator > (const Float<M, E>& flt) const { | 113 | bool operator>(const Float<M, E>& flt) const { |
| 114 | return ToFloat32() > flt.ToFloat32(); | 114 | return ToFloat32() > flt.ToFloat32(); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | bool operator >= (const Float<M, E>& flt) const { | 117 | bool operator>=(const Float<M, E>& flt) const { |
| 118 | return ToFloat32() >= flt.ToFloat32(); | 118 | return ToFloat32() >= flt.ToFloat32(); |
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | bool operator <= (const Float<M, E>& flt) const { | 121 | bool operator<=(const Float<M, E>& flt) const { |
| 122 | return ToFloat32() <= flt.ToFloat32(); | 122 | return ToFloat32() <= flt.ToFloat32(); |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | bool operator == (const Float<M, E>& flt) const { | 125 | bool operator==(const Float<M, E>& flt) const { |
| 126 | return ToFloat32() == flt.ToFloat32(); | 126 | return ToFloat32() == flt.ToFloat32(); |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | bool operator != (const Float<M, E>& flt) const { | 129 | bool operator!=(const Float<M, E>& flt) const { |
| 130 | return ToFloat32() != flt.ToFloat32(); | 130 | return ToFloat32() != flt.ToFloat32(); |
| 131 | } | 131 | } |
| 132 | 132 | ||
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 68ea3c08a..be7377290 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp | |||
| @@ -3,69 +3,66 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | |||
| 7 | #include "video_core/pica.h" | 6 | #include "video_core/pica.h" |
| 8 | #include "video_core/primitive_assembly.h" | 7 | #include "video_core/primitive_assembly.h" |
| 9 | #include "video_core/shader/shader.h" | 8 | #include "video_core/shader/shader.h" |
| 10 | 9 | ||
| 11 | namespace Pica { | 10 | namespace Pica { |
| 12 | 11 | ||
| 13 | template<typename VertexType> | 12 | template <typename VertexType> |
| 14 | PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) | 13 | PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) |
| 15 | : topology(topology), buffer_index(0) { | 14 | : topology(topology), buffer_index(0) {} |
| 16 | } | ||
| 17 | 15 | ||
| 18 | template<typename VertexType> | 16 | template <typename VertexType> |
| 19 | void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) | 17 | void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, |
| 20 | { | 18 | TriangleHandler triangle_handler) { |
| 21 | switch (topology) { | 19 | switch (topology) { |
| 22 | // TODO: Figure out what's different with TriangleTopology::Shader. | 20 | // TODO: Figure out what's different with TriangleTopology::Shader. |
| 23 | case Regs::TriangleTopology::List: | 21 | case Regs::TriangleTopology::List: |
| 24 | case Regs::TriangleTopology::Shader: | 22 | case Regs::TriangleTopology::Shader: |
| 25 | if (buffer_index < 2) { | 23 | if (buffer_index < 2) { |
| 26 | buffer[buffer_index++] = vtx; | 24 | buffer[buffer_index++] = vtx; |
| 27 | } else { | 25 | } else { |
| 28 | buffer_index = 0; | 26 | buffer_index = 0; |
| 29 | 27 | ||
| 30 | triangle_handler(buffer[0], buffer[1], vtx); | 28 | triangle_handler(buffer[0], buffer[1], vtx); |
| 31 | } | 29 | } |
| 32 | break; | 30 | break; |
| 33 | 31 | ||
| 34 | case Regs::TriangleTopology::Strip: | 32 | case Regs::TriangleTopology::Strip: |
| 35 | case Regs::TriangleTopology::Fan: | 33 | case Regs::TriangleTopology::Fan: |
| 36 | if (strip_ready) | 34 | if (strip_ready) |
| 37 | triangle_handler(buffer[0], buffer[1], vtx); | 35 | triangle_handler(buffer[0], buffer[1], vtx); |
| 38 | 36 | ||
| 39 | buffer[buffer_index] = vtx; | 37 | buffer[buffer_index] = vtx; |
| 40 | 38 | ||
| 41 | strip_ready |= (buffer_index == 1); | 39 | strip_ready |= (buffer_index == 1); |
| 42 | 40 | ||
| 43 | if (topology == Regs::TriangleTopology::Strip) | 41 | if (topology == Regs::TriangleTopology::Strip) |
| 44 | buffer_index = !buffer_index; | 42 | buffer_index = !buffer_index; |
| 45 | else if (topology == Regs::TriangleTopology::Fan) | 43 | else if (topology == Regs::TriangleTopology::Fan) |
| 46 | buffer_index = 1; | 44 | buffer_index = 1; |
| 47 | break; | 45 | break; |
| 48 | 46 | ||
| 49 | default: | 47 | default: |
| 50 | LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology); | 48 | LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology); |
| 51 | break; | 49 | break; |
| 52 | } | 50 | } |
| 53 | } | 51 | } |
| 54 | 52 | ||
| 55 | template<typename VertexType> | 53 | template <typename VertexType> |
| 56 | void PrimitiveAssembler<VertexType>::Reset() { | 54 | void PrimitiveAssembler<VertexType>::Reset() { |
| 57 | buffer_index = 0; | 55 | buffer_index = 0; |
| 58 | strip_ready = false; | 56 | strip_ready = false; |
| 59 | } | 57 | } |
| 60 | 58 | ||
| 61 | template<typename VertexType> | 59 | template <typename VertexType> |
| 62 | void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) { | 60 | void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) { |
| 63 | Reset(); | 61 | Reset(); |
| 64 | this->topology = topology; | 62 | this->topology = topology; |
| 65 | } | 63 | } |
| 66 | 64 | ||
| 67 | // explicitly instantiate use cases | 65 | // explicitly instantiate use cases |
| 68 | template | 66 | template struct PrimitiveAssembler<Shader::OutputVertex>; |
| 69 | struct PrimitiveAssembler<Shader::OutputVertex>; | ||
| 70 | 67 | ||
| 71 | } // namespace | 68 | } // namespace |
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 9396b4c85..0384d5984 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <functional> | 7 | #include <functional> |
| 8 | |||
| 9 | #include "video_core/pica.h" | 8 | #include "video_core/pica.h" |
| 10 | 9 | ||
| 11 | namespace Pica { | 10 | namespace Pica { |
| @@ -14,11 +13,9 @@ namespace Pica { | |||
| 14 | * Utility class to build triangles from a series of vertices, | 13 | * Utility class to build triangles from a series of vertices, |
| 15 | * according to a given triangle topology. | 14 | * according to a given triangle topology. |
| 16 | */ | 15 | */ |
| 17 | template<typename VertexType> | 16 | template <typename VertexType> |
| 18 | struct PrimitiveAssembler { | 17 | struct PrimitiveAssembler { |
| 19 | using TriangleHandler = std::function<void(VertexType& v0, | 18 | using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>; |
| 20 | VertexType& v1, | ||
| 21 | VertexType& v2)>; | ||
| 22 | 19 | ||
| 23 | PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); | 20 | PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); |
| 24 | 21 | ||
| @@ -48,5 +45,4 @@ private: | |||
| 48 | bool strip_ready = false; | 45 | bool strip_ready = false; |
| 49 | }; | 46 | }; |
| 50 | 47 | ||
| 51 | |||
| 52 | } // namespace | 48 | } // namespace |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 6f369a00e..6c4bbed33 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cmath> | 7 | #include <cmath> |
| 8 | |||
| 9 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 10 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 11 | #include "common/color.h" | 10 | #include "common/color.h" |
| @@ -14,17 +13,15 @@ | |||
| 14 | #include "common/math_util.h" | 13 | #include "common/math_util.h" |
| 15 | #include "common/microprofile.h" | 14 | #include "common/microprofile.h" |
| 16 | #include "common/vector_math.h" | 15 | #include "common/vector_math.h" |
| 17 | |||
| 18 | #include "core/memory.h" | ||
| 19 | #include "core/hw/gpu.h" | 16 | #include "core/hw/gpu.h" |
| 20 | 17 | #include "core/memory.h" | |
| 21 | #include "video_core/debug_utils/debug_utils.h" | 18 | #include "video_core/debug_utils/debug_utils.h" |
| 22 | #include "video_core/pica.h" | 19 | #include "video_core/pica.h" |
| 23 | #include "video_core/pica_state.h" | 20 | #include "video_core/pica_state.h" |
| 24 | #include "video_core/pica_types.h" | 21 | #include "video_core/pica_types.h" |
| 25 | #include "video_core/rasterizer.h" | 22 | #include "video_core/rasterizer.h" |
| 26 | #include "video_core/utils.h" | ||
| 27 | #include "video_core/shader/shader.h" | 23 | #include "video_core/shader/shader.h" |
| 24 | #include "video_core/utils.h" | ||
| 28 | 25 | ||
| 29 | namespace Pica { | 26 | namespace Pica { |
| 30 | 27 | ||
| @@ -39,8 +36,10 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 39 | y = framebuffer.height - y; | 36 | y = framebuffer.height - y; |
| 40 | 37 | ||
| 41 | const u32 coarse_y = y & ~7; | 38 | const u32 coarse_y = y & ~7; |
| 42 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | 39 | u32 bytes_per_pixel = |
| 43 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | 40 | GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 41 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||
| 42 | coarse_y * framebuffer.width * bytes_per_pixel; | ||
| 44 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; | 43 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; |
| 45 | 44 | ||
| 46 | switch (framebuffer.color_format) { | 45 | switch (framebuffer.color_format) { |
| @@ -65,7 +64,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 65 | break; | 64 | break; |
| 66 | 65 | ||
| 67 | default: | 66 | default: |
| 68 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | 67 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", |
| 68 | framebuffer.color_format.Value()); | ||
| 69 | UNIMPLEMENTED(); | 69 | UNIMPLEMENTED(); |
| 70 | } | 70 | } |
| 71 | } | 71 | } |
| @@ -77,8 +77,10 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 77 | y = framebuffer.height - y; | 77 | y = framebuffer.height - y; |
| 78 | 78 | ||
| 79 | const u32 coarse_y = y & ~7; | 79 | const u32 coarse_y = y & ~7; |
| 80 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | 80 | u32 bytes_per_pixel = |
| 81 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | 81 | GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 82 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||
| 83 | coarse_y * framebuffer.width * bytes_per_pixel; | ||
| 82 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; | 84 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; |
| 83 | 85 | ||
| 84 | switch (framebuffer.color_format) { | 86 | switch (framebuffer.color_format) { |
| @@ -98,7 +100,8 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 98 | return Color::DecodeRGBA4(src_pixel); | 100 | return Color::DecodeRGBA4(src_pixel); |
| 99 | 101 | ||
| 100 | default: | 102 | default: |
| 101 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | 103 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", |
| 104 | framebuffer.color_format.Value()); | ||
| 102 | UNIMPLEMENTED(); | 105 | UNIMPLEMENTED(); |
| 103 | } | 106 | } |
| 104 | 107 | ||
| @@ -120,16 +123,16 @@ static u32 GetDepth(int x, int y) { | |||
| 120 | u8* src_pixel = depth_buffer + src_offset; | 123 | u8* src_pixel = depth_buffer + src_offset; |
| 121 | 124 | ||
| 122 | switch (framebuffer.depth_format) { | 125 | switch (framebuffer.depth_format) { |
| 123 | case Regs::DepthFormat::D16: | 126 | case Regs::DepthFormat::D16: |
| 124 | return Color::DecodeD16(src_pixel); | 127 | return Color::DecodeD16(src_pixel); |
| 125 | case Regs::DepthFormat::D24: | 128 | case Regs::DepthFormat::D24: |
| 126 | return Color::DecodeD24(src_pixel); | 129 | return Color::DecodeD24(src_pixel); |
| 127 | case Regs::DepthFormat::D24S8: | 130 | case Regs::DepthFormat::D24S8: |
| 128 | return Color::DecodeD24S8(src_pixel).x; | 131 | return Color::DecodeD24S8(src_pixel).x; |
| 129 | default: | 132 | default: |
| 130 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 133 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 131 | UNIMPLEMENTED(); | 134 | UNIMPLEMENTED(); |
| 132 | return 0; | 135 | return 0; |
| 133 | } | 136 | } |
| 134 | } | 137 | } |
| 135 | 138 | ||
| @@ -148,12 +151,15 @@ static u8 GetStencil(int x, int y) { | |||
| 148 | u8* src_pixel = depth_buffer + src_offset; | 151 | u8* src_pixel = depth_buffer + src_offset; |
| 149 | 152 | ||
| 150 | switch (framebuffer.depth_format) { | 153 | switch (framebuffer.depth_format) { |
| 151 | case Regs::DepthFormat::D24S8: | 154 | case Regs::DepthFormat::D24S8: |
| 152 | return Color::DecodeD24S8(src_pixel).y; | 155 | return Color::DecodeD24S8(src_pixel).y; |
| 153 | 156 | ||
| 154 | default: | 157 | default: |
| 155 | LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); | 158 | LOG_WARNING( |
| 156 | return 0; | 159 | HW_GPU, |
| 160 | "GetStencil called for function which doesn't have a stencil component (format %u)", | ||
| 161 | framebuffer.depth_format); | ||
| 162 | return 0; | ||
| 157 | } | 163 | } |
| 158 | } | 164 | } |
| 159 | 165 | ||
| @@ -172,22 +178,22 @@ static void SetDepth(int x, int y, u32 value) { | |||
| 172 | u8* dst_pixel = depth_buffer + dst_offset; | 178 | u8* dst_pixel = depth_buffer + dst_offset; |
| 173 | 179 | ||
| 174 | switch (framebuffer.depth_format) { | 180 | switch (framebuffer.depth_format) { |
| 175 | case Regs::DepthFormat::D16: | 181 | case Regs::DepthFormat::D16: |
| 176 | Color::EncodeD16(value, dst_pixel); | 182 | Color::EncodeD16(value, dst_pixel); |
| 177 | break; | 183 | break; |
| 178 | 184 | ||
| 179 | case Regs::DepthFormat::D24: | 185 | case Regs::DepthFormat::D24: |
| 180 | Color::EncodeD24(value, dst_pixel); | 186 | Color::EncodeD24(value, dst_pixel); |
| 181 | break; | 187 | break; |
| 182 | 188 | ||
| 183 | case Regs::DepthFormat::D24S8: | 189 | case Regs::DepthFormat::D24S8: |
| 184 | Color::EncodeD24X8(value, dst_pixel); | 190 | Color::EncodeD24X8(value, dst_pixel); |
| 185 | break; | 191 | break; |
| 186 | 192 | ||
| 187 | default: | 193 | default: |
| 188 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 194 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 189 | UNIMPLEMENTED(); | 195 | UNIMPLEMENTED(); |
| 190 | break; | 196 | break; |
| 191 | } | 197 | } |
| 192 | } | 198 | } |
| 193 | 199 | ||
| @@ -206,19 +212,19 @@ static void SetStencil(int x, int y, u8 value) { | |||
| 206 | u8* dst_pixel = depth_buffer + dst_offset; | 212 | u8* dst_pixel = depth_buffer + dst_offset; |
| 207 | 213 | ||
| 208 | switch (framebuffer.depth_format) { | 214 | switch (framebuffer.depth_format) { |
| 209 | case Pica::Regs::DepthFormat::D16: | 215 | case Pica::Regs::DepthFormat::D16: |
| 210 | case Pica::Regs::DepthFormat::D24: | 216 | case Pica::Regs::DepthFormat::D24: |
| 211 | // Nothing to do | 217 | // Nothing to do |
| 212 | break; | 218 | break; |
| 213 | 219 | ||
| 214 | case Pica::Regs::DepthFormat::D24S8: | 220 | case Pica::Regs::DepthFormat::D24S8: |
| 215 | Color::EncodeX24S8(value, dst_pixel); | 221 | Color::EncodeX24S8(value, dst_pixel); |
| 216 | break; | 222 | break; |
| 217 | 223 | ||
| 218 | default: | 224 | default: |
| 219 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 225 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 220 | UNIMPLEMENTED(); | 226 | UNIMPLEMENTED(); |
| 221 | break; | 227 | break; |
| 222 | } | 228 | } |
| 223 | } | 229 | } |
| 224 | 230 | ||
| @@ -262,15 +268,19 @@ struct Fix12P4 { | |||
| 262 | Fix12P4() {} | 268 | Fix12P4() {} |
| 263 | Fix12P4(u16 val) : val(val) {} | 269 | Fix12P4(u16 val) : val(val) {} |
| 264 | 270 | ||
| 265 | static u16 FracMask() { return 0xF; } | 271 | static u16 FracMask() { |
| 266 | static u16 IntMask() { return (u16)~0xF; } | 272 | return 0xF; |
| 273 | } | ||
| 274 | static u16 IntMask() { | ||
| 275 | return (u16)~0xF; | ||
| 276 | } | ||
| 267 | 277 | ||
| 268 | operator u16() const { | 278 | operator u16() const { |
| 269 | return val; | 279 | return val; |
| 270 | } | 280 | } |
| 271 | 281 | ||
| 272 | bool operator < (const Fix12P4& oth) const { | 282 | bool operator<(const Fix12P4& oth) const { |
| 273 | return (u16)*this < (u16)oth; | 283 | return (u16) * this < (u16)oth; |
| 274 | } | 284 | } |
| 275 | 285 | ||
| 276 | private: | 286 | private: |
| @@ -283,9 +293,8 @@ private: | |||
| 283 | * | 293 | * |
| 284 | * @todo define orientation concretely. | 294 | * @todo define orientation concretely. |
| 285 | */ | 295 | */ |
| 286 | static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | 296 | static int SignedArea(const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2, |
| 287 | const Math::Vec2<Fix12P4>& vtx2, | 297 | const Math::Vec2<Fix12P4>& vtx3) { |
| 288 | const Math::Vec2<Fix12P4>& vtx3) { | ||
| 289 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); | 298 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); |
| 290 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); | 299 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); |
| 291 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | 300 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 |
| @@ -298,11 +307,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24 | |||
| 298 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing | 307 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing |
| 299 | * culling via recursion. | 308 | * culling via recursion. |
| 300 | */ | 309 | */ |
| 301 | static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | 310 | static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, |
| 302 | const Shader::OutputVertex& v1, | 311 | const Shader::OutputVertex& v2, bool reversed = false) { |
| 303 | const Shader::OutputVertex& v2, | ||
| 304 | bool reversed = false) | ||
| 305 | { | ||
| 306 | const auto& regs = g_state.regs; | 312 | const auto& regs = g_state.regs; |
| 307 | MICROPROFILE_SCOPE(GPU_Rasterization); | 313 | MICROPROFILE_SCOPE(GPU_Rasterization); |
| 308 | 314 | ||
| @@ -316,9 +322,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 316 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | 322 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; |
| 317 | }; | 323 | }; |
| 318 | 324 | ||
| 319 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | 325 | Math::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos), |
| 320 | ScreenToRasterizerCoordinates(v1.screenpos), | 326 | ScreenToRasterizerCoordinates(v1.screenpos), |
| 321 | ScreenToRasterizerCoordinates(v2.screenpos) }; | 327 | ScreenToRasterizerCoordinates(v2.screenpos)}; |
| 322 | 328 | ||
| 323 | if (regs.cull_mode == Regs::CullMode::KeepAll) { | 329 | if (regs.cull_mode == Regs::CullMode::KeepAll) { |
| 324 | // Make sure we always end up with a triangle wound counter-clockwise | 330 | // Make sure we always end up with a triangle wound counter-clockwise |
| @@ -344,8 +350,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 344 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | 350 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
| 345 | 351 | ||
| 346 | // Convert the scissor box coordinates to 12.4 fixed point | 352 | // Convert the scissor box coordinates to 12.4 fixed point |
| 347 | u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); | 353 | u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4); |
| 348 | u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); | 354 | u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4); |
| 349 | // x2,y2 have +1 added to cover the entire sub-pixel area | 355 | // x2,y2 have +1 added to cover the entire sub-pixel area |
| 350 | u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); | 356 | u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); |
| 351 | u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); | 357 | u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); |
| @@ -369,27 +375,32 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 369 | // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... | 375 | // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... |
| 370 | auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, | 376 | auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, |
| 371 | const Math::Vec2<Fix12P4>& line1, | 377 | const Math::Vec2<Fix12P4>& line1, |
| 372 | const Math::Vec2<Fix12P4>& line2) | 378 | const Math::Vec2<Fix12P4>& line2) { |
| 373 | { | ||
| 374 | if (line1.y == line2.y) { | 379 | if (line1.y == line2.y) { |
| 375 | // just check if vertex is above us => bottom line parallel to x-axis | 380 | // just check if vertex is above us => bottom line parallel to x-axis |
| 376 | return vtx.y < line1.y; | 381 | return vtx.y < line1.y; |
| 377 | } else { | 382 | } else { |
| 378 | // check if vertex is on our left => right side | 383 | // check if vertex is on our left => right side |
| 379 | // TODO: Not sure how likely this is to overflow | 384 | // TODO: Not sure how likely this is to overflow |
| 380 | return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); | 385 | return (int)vtx.x < (int)line1.x + |
| 386 | ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / | ||
| 387 | ((int)line2.y - (int)line1.y); | ||
| 381 | } | 388 | } |
| 382 | }; | 389 | }; |
| 383 | int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; | 390 | int bias0 = |
| 384 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | 391 | IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; |
| 385 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | 392 | int bias1 = |
| 393 | IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | ||
| 394 | int bias2 = | ||
| 395 | IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | ||
| 386 | 396 | ||
| 387 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | 397 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); |
| 388 | 398 | ||
| 389 | auto textures = regs.GetTextures(); | 399 | auto textures = regs.GetTextures(); |
| 390 | auto tev_stages = regs.GetTevStages(); | 400 | auto tev_stages = regs.GetTevStages(); |
| 391 | 401 | ||
| 392 | bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | 402 | bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && |
| 403 | g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | ||
| 393 | const auto stencil_test = g_state.regs.output_merger.stencil_test; | 404 | const auto stencil_test = g_state.regs.output_merger.stencil_test; |
| 394 | 405 | ||
| 395 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. | 406 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. |
| @@ -397,10 +408,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 397 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { | 408 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { |
| 398 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { | 409 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { |
| 399 | 410 | ||
| 400 | // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude | 411 | // Do not process the pixel if it's inside the scissor box and the scissor mode is set |
| 412 | // to Exclude | ||
| 401 | if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { | 413 | if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { |
| 402 | if (x >= scissor_x1 && x < scissor_x2 && | 414 | if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) |
| 403 | y >= scissor_y1 && y < scissor_y2) | ||
| 404 | continue; | 415 | continue; |
| 405 | } | 416 | } |
| 406 | 417 | ||
| @@ -414,15 +425,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 414 | if (w0 < 0 || w1 < 0 || w2 < 0) | 425 | if (w0 < 0 || w1 < 0 || w2 < 0) |
| 415 | continue; | 426 | continue; |
| 416 | 427 | ||
| 417 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), | 428 | auto baricentric_coordinates = |
| 418 | float24::FromFloat32(static_cast<float>(w1)), | 429 | Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), |
| 419 | float24::FromFloat32(static_cast<float>(w2))); | 430 | float24::FromFloat32(static_cast<float>(w1)), |
| 420 | float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | 431 | float24::FromFloat32(static_cast<float>(w2))); |
| 432 | float24 interpolated_w_inverse = | ||
| 433 | float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | ||
| 421 | 434 | ||
| 422 | // interpolated_z = z / w | 435 | // interpolated_z = z / w |
| 423 | float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + | 436 | float interpolated_z_over_w = |
| 424 | v1.screenpos[2].ToFloat32() * w1 + | 437 | (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + |
| 425 | v2.screenpos[2].ToFloat32() * w2) / wsum; | 438 | v2.screenpos[2].ToFloat32() * w2) / |
| 439 | wsum; | ||
| 426 | 440 | ||
| 427 | // Not fully accurate. About 3 bits in precision are missing. | 441 | // Not fully accurate. About 3 bits in precision are missing. |
| 428 | // Z-Buffer (z / w * scale + offset) | 442 | // Z-Buffer (z / w * scale + offset) |
| @@ -461,10 +475,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 461 | }; | 475 | }; |
| 462 | 476 | ||
| 463 | Math::Vec4<u8> primary_color{ | 477 | Math::Vec4<u8> primary_color{ |
| 464 | (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), | 478 | (u8)( |
| 465 | (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), | 479 | GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * |
| 466 | (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), | 480 | 255), |
| 467 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) | 481 | (u8)( |
| 482 | GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * | ||
| 483 | 255), | ||
| 484 | (u8)( | ||
| 485 | GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * | ||
| 486 | 255), | ||
| 487 | (u8)( | ||
| 488 | GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * | ||
| 489 | 255), | ||
| 468 | }; | 490 | }; |
| 469 | 491 | ||
| 470 | Math::Vec2<float24> uv[3]; | 492 | Math::Vec2<float24> uv[3]; |
| @@ -489,7 +511,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 489 | // Only unit 0 respects the texturing type (according to 3DBrew) | 511 | // Only unit 0 respects the texturing type (according to 3DBrew) |
| 490 | // TODO: Refactor so cubemaps and shadowmaps can be handled | 512 | // TODO: Refactor so cubemaps and shadowmaps can be handled |
| 491 | if (i == 0) { | 513 | if (i == 0) { |
| 492 | switch(texture.config.type) { | 514 | switch (texture.config.type) { |
| 493 | case Regs::TextureConfig::Texture2D: | 515 | case Regs::TextureConfig::Texture2D: |
| 494 | break; | 516 | break; |
| 495 | case Regs::TextureConfig::Projection2D: { | 517 | case Regs::TextureConfig::Projection2D: { |
| @@ -506,51 +528,58 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 506 | } | 528 | } |
| 507 | } | 529 | } |
| 508 | 530 | ||
| 509 | int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | 531 | int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))) |
| 510 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | 532 | .ToFloat32(); |
| 533 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) | ||
| 534 | .ToFloat32(); | ||
| 511 | 535 | ||
| 512 | 536 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, | |
| 513 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { | 537 | unsigned size) { |
| 514 | switch (mode) { | 538 | switch (mode) { |
| 515 | case Regs::TextureConfig::ClampToEdge: | 539 | case Regs::TextureConfig::ClampToEdge: |
| 516 | val = std::max(val, 0); | 540 | val = std::max(val, 0); |
| 517 | val = std::min(val, (int)size - 1); | 541 | val = std::min(val, (int)size - 1); |
| 518 | return val; | 542 | return val; |
| 519 | 543 | ||
| 520 | case Regs::TextureConfig::ClampToBorder: | 544 | case Regs::TextureConfig::ClampToBorder: |
| 521 | return val; | 545 | return val; |
| 522 | 546 | ||
| 523 | case Regs::TextureConfig::Repeat: | 547 | case Regs::TextureConfig::Repeat: |
| 524 | return (int)((unsigned)val % size); | 548 | return (int)((unsigned)val % size); |
| 525 | 549 | ||
| 526 | case Regs::TextureConfig::MirroredRepeat: | 550 | case Regs::TextureConfig::MirroredRepeat: { |
| 527 | { | 551 | unsigned int coord = ((unsigned)val % (2 * size)); |
| 528 | unsigned int coord = ((unsigned)val % (2 * size)); | 552 | if (coord >= size) |
| 529 | if (coord >= size) | 553 | coord = 2 * size - 1 - coord; |
| 530 | coord = 2 * size - 1 - coord; | 554 | return (int)coord; |
| 531 | return (int)coord; | 555 | } |
| 532 | } | 556 | |
| 533 | 557 | default: | |
| 534 | default: | 558 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); |
| 535 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | 559 | UNIMPLEMENTED(); |
| 536 | UNIMPLEMENTED(); | 560 | return 0; |
| 537 | return 0; | ||
| 538 | } | 561 | } |
| 539 | }; | 562 | }; |
| 540 | 563 | ||
| 541 | if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) | 564 | if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && |
| 542 | || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { | 565 | (s < 0 || s >= texture.config.width)) || |
| 566 | (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && | ||
| 567 | (t < 0 || t >= texture.config.height))) { | ||
| 543 | auto border_color = texture.config.border_color; | 568 | auto border_color = texture.config.border_color; |
| 544 | texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; | 569 | texture_color[i] = {border_color.r, border_color.g, border_color.b, |
| 570 | border_color.a}; | ||
| 545 | } else { | 571 | } else { |
| 546 | // Textures are laid out from bottom to top, hence we invert the t coordinate. | 572 | // Textures are laid out from bottom to top, hence we invert the t coordinate. |
| 547 | // NOTE: This may not be the right place for the inversion. | 573 | // NOTE: This may not be the right place for the inversion. |
| 548 | // TODO: Check if this applies to ETC textures, too. | 574 | // TODO: Check if this applies to ETC textures, too. |
| 549 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | 575 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); |
| 550 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | 576 | t = texture.config.height - 1 - |
| 577 | GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | ||
| 551 | 578 | ||
| 552 | u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); | 579 | u8* texture_data = |
| 553 | auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | 580 | Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); |
| 581 | auto info = | ||
| 582 | DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | ||
| 554 | 583 | ||
| 555 | // TODO: Apply the min and mag filters to the texture | 584 | // TODO: Apply the min and mag filters to the texture |
| 556 | texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); | 585 | texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); |
| @@ -571,10 +600,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 571 | Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; | 600 | Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; |
| 572 | Math::Vec4<u8> next_combiner_buffer = { | 601 | Math::Vec4<u8> next_combiner_buffer = { |
| 573 | regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, | 602 | regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, |
| 574 | regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a | 603 | regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a, |
| 575 | }; | 604 | }; |
| 576 | 605 | ||
| 577 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | 606 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); |
| 607 | ++tev_stage_index) { | ||
| 578 | const auto& tev_stage = tev_stages[tev_stage_index]; | 608 | const auto& tev_stage = tev_stages[tev_stage_index]; |
| 579 | using Source = Regs::TevStageConfig::Source; | 609 | using Source = Regs::TevStageConfig::Source; |
| 580 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | 610 | using ColorModifier = Regs::TevStageConfig::ColorModifier; |
| @@ -606,7 +636,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 606 | return combiner_buffer; | 636 | return combiner_buffer; |
| 607 | 637 | ||
| 608 | case Source::Constant: | 638 | case Source::Constant: |
| 609 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; | 639 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, |
| 640 | tev_stage.const_a}; | ||
| 610 | 641 | ||
| 611 | case Source::Previous: | 642 | case Source::Previous: |
| 612 | return combiner_output; | 643 | return combiner_output; |
| @@ -618,7 +649,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 618 | } | 649 | } |
| 619 | }; | 650 | }; |
| 620 | 651 | ||
| 621 | static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | 652 | static auto GetColorModifier = [](ColorModifier factor, |
| 653 | const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||
| 622 | switch (factor) { | 654 | switch (factor) { |
| 623 | case ColorModifier::SourceColor: | 655 | case ColorModifier::SourceColor: |
| 624 | return values.rgb(); | 656 | return values.rgb(); |
| @@ -652,7 +684,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 652 | } | 684 | } |
| 653 | }; | 685 | }; |
| 654 | 686 | ||
| 655 | static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 { | 687 | static auto GetAlphaModifier = [](AlphaModifier factor, |
| 688 | const Math::Vec4<u8>& values) -> u8 { | ||
| 656 | switch (factor) { | 689 | switch (factor) { |
| 657 | case AlphaModifier::SourceAlpha: | 690 | case AlphaModifier::SourceAlpha: |
| 658 | return values.a(); | 691 | return values.a(); |
| @@ -680,7 +713,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 680 | } | 713 | } |
| 681 | }; | 714 | }; |
| 682 | 715 | ||
| 683 | static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | 716 | static auto ColorCombine = [](Operation op, |
| 717 | const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||
| 684 | switch (op) { | 718 | switch (op) { |
| 685 | case Operation::Replace: | 719 | case Operation::Replace: |
| 686 | return input[0]; | 720 | return input[0]; |
| @@ -688,8 +722,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 688 | case Operation::Modulate: | 722 | case Operation::Modulate: |
| 689 | return ((input[0] * input[1]) / 255).Cast<u8>(); | 723 | return ((input[0] * input[1]) / 255).Cast<u8>(); |
| 690 | 724 | ||
| 691 | case Operation::Add: | 725 | case Operation::Add: { |
| 692 | { | ||
| 693 | auto result = input[0] + input[1]; | 726 | auto result = input[0] + input[1]; |
| 694 | result.r() = std::min(255, result.r()); | 727 | result.r() = std::min(255, result.r()); |
| 695 | result.g() = std::min(255, result.g()); | 728 | result.g() = std::min(255, result.g()); |
| @@ -697,10 +730,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 697 | return result.Cast<u8>(); | 730 | return result.Cast<u8>(); |
| 698 | } | 731 | } |
| 699 | 732 | ||
| 700 | case Operation::AddSigned: | 733 | case Operation::AddSigned: { |
| 701 | { | 734 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to |
| 702 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | 735 | // (byte) 128 is correct |
| 703 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | 736 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - |
| 737 | Math::MakeVec<int>(128, 128, 128); | ||
| 704 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | 738 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); |
| 705 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | 739 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); |
| 706 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | 740 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); |
| @@ -708,10 +742,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 708 | } | 742 | } |
| 709 | 743 | ||
| 710 | case Operation::Lerp: | 744 | case Operation::Lerp: |
| 711 | return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); | 745 | return ((input[0] * input[2] + |
| 746 | input[1] * | ||
| 747 | (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||
| 748 | 255) | ||
| 749 | .Cast<u8>(); | ||
| 712 | 750 | ||
| 713 | case Operation::Subtract: | 751 | case Operation::Subtract: { |
| 714 | { | ||
| 715 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); | 752 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); |
| 716 | result.r() = std::max(0, result.r()); | 753 | result.r() = std::max(0, result.r()); |
| 717 | result.g() = std::max(0, result.g()); | 754 | result.g() = std::max(0, result.g()); |
| @@ -719,8 +756,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 719 | return result.Cast<u8>(); | 756 | return result.Cast<u8>(); |
| 720 | } | 757 | } |
| 721 | 758 | ||
| 722 | case Operation::MultiplyThenAdd: | 759 | case Operation::MultiplyThenAdd: { |
| 723 | { | ||
| 724 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | 760 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; |
| 725 | result.r() = std::min(255, result.r()); | 761 | result.r() = std::min(255, result.r()); |
| 726 | result.g() = std::min(255, result.g()); | 762 | result.g() = std::min(255, result.g()); |
| @@ -728,8 +764,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 728 | return result.Cast<u8>(); | 764 | return result.Cast<u8>(); |
| 729 | } | 765 | } |
| 730 | 766 | ||
| 731 | case Operation::AddThenMultiply: | 767 | case Operation::AddThenMultiply: { |
| 732 | { | ||
| 733 | auto result = input[0] + input[1]; | 768 | auto result = input[0] + input[1]; |
| 734 | result.r() = std::min(255, result.r()); | 769 | result.r() = std::min(255, result.r()); |
| 735 | result.g() = std::min(255, result.g()); | 770 | result.g() = std::min(255, result.g()); |
| @@ -737,17 +772,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 737 | result = (result * input[2].Cast<int>()) / 255; | 772 | result = (result * input[2].Cast<int>()) / 255; |
| 738 | return result.Cast<u8>(); | 773 | return result.Cast<u8>(); |
| 739 | } | 774 | } |
| 740 | case Operation::Dot3_RGB: | 775 | case Operation::Dot3_RGB: { |
| 741 | { | ||
| 742 | // Not fully accurate. | 776 | // Not fully accurate. |
| 743 | // Worst case scenario seems to yield a +/-3 error | 777 | // Worst case scenario seems to yield a +/-3 error |
| 744 | // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, | 778 | // Some HW results indicate that the per-component computation can't have a |
| 745 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results | 779 | // higher precision than 1/256, |
| 746 | int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | 780 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( |
| 747 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | 781 | // (0x80,g0,b0),(0x80,g1,b1) ) give different results |
| 748 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | 782 | int result = |
| 783 | ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||
| 784 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||
| 785 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||
| 749 | result = std::max(0, std::min(255, result)); | 786 | result = std::max(0, std::min(255, result)); |
| 750 | return { (u8)result, (u8)result, (u8)result }; | 787 | return {(u8)result, (u8)result, (u8)result}; |
| 751 | } | 788 | } |
| 752 | default: | 789 | default: |
| 753 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | 790 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); |
| @@ -756,7 +793,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 756 | } | 793 | } |
| 757 | }; | 794 | }; |
| 758 | 795 | ||
| 759 | static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | 796 | static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 { |
| 760 | switch (op) { | 797 | switch (op) { |
| 761 | case Operation::Replace: | 798 | case Operation::Replace: |
| 762 | return input[0]; | 799 | return input[0]; |
| @@ -767,9 +804,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 767 | case Operation::Add: | 804 | case Operation::Add: |
| 768 | return std::min(255, input[0] + input[1]); | 805 | return std::min(255, input[0] + input[1]); |
| 769 | 806 | ||
| 770 | case Operation::AddSigned: | 807 | case Operation::AddSigned: { |
| 771 | { | 808 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to |
| 772 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | 809 | // (byte) 128 is correct |
| 773 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | 810 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; |
| 774 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | 811 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); |
| 775 | } | 812 | } |
| @@ -801,32 +838,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 801 | Math::Vec3<u8> color_result[3] = { | 838 | Math::Vec3<u8> color_result[3] = { |
| 802 | GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), | 839 | GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), |
| 803 | GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), | 840 | GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), |
| 804 | GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)) | 841 | GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)), |
| 805 | }; | 842 | }; |
| 806 | auto color_output = ColorCombine(tev_stage.color_op, color_result); | 843 | auto color_output = ColorCombine(tev_stage.color_op, color_result); |
| 807 | 844 | ||
| 808 | // alpha combiner | 845 | // alpha combiner |
| 809 | std::array<u8,3> alpha_result = {{ | 846 | std::array<u8, 3> alpha_result = {{ |
| 810 | GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), | 847 | GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), |
| 811 | GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), | 848 | GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), |
| 812 | GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)) | 849 | GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)), |
| 813 | }}; | 850 | }}; |
| 814 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | 851 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); |
| 815 | 852 | ||
| 816 | combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); | 853 | combiner_output[0] = |
| 817 | combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); | 854 | std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); |
| 818 | combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | 855 | combiner_output[1] = |
| 819 | combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | 856 | std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); |
| 857 | combiner_output[2] = | ||
| 858 | std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | ||
| 859 | combiner_output[3] = | ||
| 860 | std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | ||
| 820 | 861 | ||
| 821 | combiner_buffer = next_combiner_buffer; | 862 | combiner_buffer = next_combiner_buffer; |
| 822 | 863 | ||
| 823 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { | 864 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( |
| 865 | tev_stage_index)) { | ||
| 824 | next_combiner_buffer.r() = combiner_output.r(); | 866 | next_combiner_buffer.r() = combiner_output.r(); |
| 825 | next_combiner_buffer.g() = combiner_output.g(); | 867 | next_combiner_buffer.g() = combiner_output.g(); |
| 826 | next_combiner_buffer.b() = combiner_output.b(); | 868 | next_combiner_buffer.b() = combiner_output.b(); |
| 827 | } | 869 | } |
| 828 | 870 | ||
| 829 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { | 871 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( |
| 872 | tev_stage_index)) { | ||
| 830 | next_combiner_buffer.a() = combiner_output.a(); | 873 | next_combiner_buffer.a() = combiner_output.a(); |
| 831 | } | 874 | } |
| 832 | } | 875 | } |
| @@ -897,21 +940,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 897 | float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); | 940 | float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); |
| 898 | float fog_f = fog_index - fog_i; | 941 | float fog_f = fog_index - fog_i; |
| 899 | const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; | 942 | const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; |
| 900 | float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 | 943 | float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / |
| 944 | 2047.0f; // This is signed fixed point 1.11 | ||
| 901 | fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); | 945 | fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); |
| 902 | 946 | ||
| 903 | // Blend the fog | 947 | // Blend the fog |
| 904 | for (unsigned i = 0; i < 3; i++) { | 948 | for (unsigned i = 0; i < 3; i++) { |
| 905 | combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; | 949 | combiner_output[i] = |
| 950 | fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; | ||
| 906 | } | 951 | } |
| 907 | } | 952 | } |
| 908 | 953 | ||
| 909 | u8 old_stencil = 0; | 954 | u8 old_stencil = 0; |
| 910 | 955 | ||
| 911 | auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { | 956 | auto UpdateStencil = [stencil_test, x, y, |
| 912 | u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); | 957 | &old_stencil](Pica::Regs::StencilAction action) { |
| 958 | u8 new_stencil = | ||
| 959 | PerformStencilAction(action, old_stencil, stencil_test.reference_value); | ||
| 913 | if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) | 960 | if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) |
| 914 | SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); | 961 | SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | |
| 962 | (old_stencil & ~stencil_test.write_mask)); | ||
| 915 | }; | 963 | }; |
| 916 | 964 | ||
| 917 | if (stencil_action_enable) { | 965 | if (stencil_action_enable) { |
| @@ -1030,7 +1078,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1030 | static_cast<u8>(output_merger.blend_const.r), | 1078 | static_cast<u8>(output_merger.blend_const.r), |
| 1031 | static_cast<u8>(output_merger.blend_const.g), | 1079 | static_cast<u8>(output_merger.blend_const.g), |
| 1032 | static_cast<u8>(output_merger.blend_const.b), | 1080 | static_cast<u8>(output_merger.blend_const.b), |
| 1033 | static_cast<u8>(output_merger.blend_const.a) | 1081 | static_cast<u8>(output_merger.blend_const.a), |
| 1034 | }; | 1082 | }; |
| 1035 | 1083 | ||
| 1036 | switch (factor) { | 1084 | switch (factor) { |
| @@ -1091,12 +1139,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1091 | return combiner_output[channel]; | 1139 | return combiner_output[channel]; |
| 1092 | }; | 1140 | }; |
| 1093 | 1141 | ||
| 1094 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | 1142 | static auto EvaluateBlendEquation = []( |
| 1095 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | 1143 | const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, |
| 1096 | Regs::BlendEquation equation) { | 1144 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, |
| 1145 | Regs::BlendEquation equation) { | ||
| 1097 | Math::Vec4<int> result; | 1146 | Math::Vec4<int> result; |
| 1098 | 1147 | ||
| 1099 | auto src_result = (src * srcfactor).Cast<int>(); | 1148 | auto src_result = (src * srcfactor).Cast<int>(); |
| 1100 | auto dst_result = (dest * destfactor).Cast<int>(); | 1149 | auto dst_result = (dest * destfactor).Cast<int>(); |
| 1101 | 1150 | ||
| 1102 | switch (equation) { | 1151 | switch (equation) { |
| @@ -1134,10 +1183,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1134 | UNIMPLEMENTED(); | 1183 | UNIMPLEMENTED(); |
| 1135 | } | 1184 | } |
| 1136 | 1185 | ||
| 1137 | return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), | 1186 | return Math::Vec4<u8>( |
| 1138 | MathUtil::Clamp(result.g(), 0, 255), | 1187 | MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), |
| 1139 | MathUtil::Clamp(result.b(), 0, 255), | 1188 | MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); |
| 1140 | MathUtil::Clamp(result.a(), 0, 255)); | ||
| 1141 | }; | 1189 | }; |
| 1142 | 1190 | ||
| 1143 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), | 1191 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), |
| @@ -1150,8 +1198,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1150 | LookupFactor(2, params.factor_dest_rgb), | 1198 | LookupFactor(2, params.factor_dest_rgb), |
| 1151 | LookupFactor(3, params.factor_dest_a)); | 1199 | LookupFactor(3, params.factor_dest_a)); |
| 1152 | 1200 | ||
| 1153 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | 1201 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, |
| 1154 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | 1202 | params.blend_equation_rgb); |
| 1203 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, | ||
| 1204 | dstfactor, params.blend_equation_a) | ||
| 1205 | .a(); | ||
| 1155 | } else { | 1206 | } else { |
| 1156 | static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { | 1207 | static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { |
| 1157 | switch (op) { | 1208 | switch (op) { |
| @@ -1205,18 +1256,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1205 | } | 1256 | } |
| 1206 | }; | 1257 | }; |
| 1207 | 1258 | ||
| 1208 | blend_output = Math::MakeVec( | 1259 | blend_output = |
| 1209 | LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | 1260 | Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), |
| 1210 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | 1261 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), |
| 1211 | LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), | 1262 | LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), |
| 1212 | LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); | 1263 | LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); |
| 1213 | } | 1264 | } |
| 1214 | 1265 | ||
| 1215 | const Math::Vec4<u8> result = { | 1266 | const Math::Vec4<u8> result = { |
| 1216 | output_merger.red_enable ? blend_output.r() : dest.r(), | 1267 | output_merger.red_enable ? blend_output.r() : dest.r(), |
| 1217 | output_merger.green_enable ? blend_output.g() : dest.g(), | 1268 | output_merger.green_enable ? blend_output.g() : dest.g(), |
| 1218 | output_merger.blue_enable ? blend_output.b() : dest.b(), | 1269 | output_merger.blue_enable ? blend_output.b() : dest.b(), |
| 1219 | output_merger.alpha_enable ? blend_output.a() : dest.a() | 1270 | output_merger.alpha_enable ? blend_output.a() : dest.a(), |
| 1220 | }; | 1271 | }; |
| 1221 | 1272 | ||
| 1222 | if (regs.framebuffer.allow_color_write != 0) | 1273 | if (regs.framebuffer.allow_color_write != 0) |
| @@ -1225,8 +1276,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1225 | } | 1276 | } |
| 1226 | } | 1277 | } |
| 1227 | 1278 | ||
| 1228 | void ProcessTriangle(const Shader::OutputVertex& v0, | 1279 | void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, |
| 1229 | const Shader::OutputVertex& v1, | ||
| 1230 | const Shader::OutputVertex& v2) { | 1280 | const Shader::OutputVertex& v2) { |
| 1231 | ProcessTriangleInternal(v0, v1, v2); | 1281 | ProcessTriangleInternal(v0, v1, v2); |
| 1232 | } | 1282 | } |
diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h index a6a9634b4..6cbda3067 100644 --- a/src/video_core/rasterizer.h +++ b/src/video_core/rasterizer.h | |||
| @@ -7,13 +7,12 @@ | |||
| 7 | namespace Pica { | 7 | namespace Pica { |
| 8 | 8 | ||
| 9 | namespace Shader { | 9 | namespace Shader { |
| 10 | struct OutputVertex; | 10 | struct OutputVertex; |
| 11 | } | 11 | } |
| 12 | 12 | ||
| 13 | namespace Rasterizer { | 13 | namespace Rasterizer { |
| 14 | 14 | ||
| 15 | void ProcessTriangle(const Shader::OutputVertex& v0, | 15 | void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, |
| 16 | const Shader::OutputVertex& v1, | ||
| 17 | const Shader::OutputVertex& v2); | 16 | const Shader::OutputVertex& v2); |
| 18 | 17 | ||
| 19 | } // namespace Rasterizer | 18 | } // namespace Rasterizer |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index bf7101665..71df233b5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | |||
| 9 | #include "core/hw/gpu.h" | 8 | #include "core/hw/gpu.h" |
| 10 | 9 | ||
| 11 | struct ScreenInfo; | 10 | struct ScreenInfo; |
| @@ -39,17 +38,25 @@ public: | |||
| 39 | /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory | 38 | /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory |
| 40 | virtual void FlushRegion(PAddr addr, u32 size) = 0; | 39 | virtual void FlushRegion(PAddr addr, u32 size) = 0; |
| 41 | 40 | ||
| 42 | /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated | 41 | /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory |
| 42 | /// and invalidated | ||
| 43 | virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; | 43 | virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; |
| 44 | 44 | ||
| 45 | /// Attempt to use a faster method to perform a display transfer | 45 | /// Attempt to use a faster method to perform a display transfer |
| 46 | virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; } | 46 | virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { |
| 47 | return false; | ||
| 48 | } | ||
| 47 | 49 | ||
| 48 | /// Attempt to use a faster method to fill a region | 50 | /// Attempt to use a faster method to fill a region |
| 49 | virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; } | 51 | virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { |
| 52 | return false; | ||
| 53 | } | ||
| 50 | 54 | ||
| 51 | /// Attempt to use a faster method to display the framebuffer to screen | 55 | /// Attempt to use a faster method to display the framebuffer to screen |
| 52 | virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; } | 56 | virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, |
| 57 | PAddr framebuffer_addr, u32 pixel_stride, | ||
| 58 | ScreenInfo& screen_info) { | ||
| 59 | return false; | ||
| 60 | } | ||
| 53 | }; | 61 | }; |
| 54 | |||
| 55 | } | 62 | } |
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 3f451e062..fd38175b3 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp | |||
| @@ -4,11 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #include <atomic> | 5 | #include <atomic> |
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | |||
| 8 | #include "video_core/renderer_base.h" | 7 | #include "video_core/renderer_base.h" |
| 9 | #include "video_core/video_core.h" | ||
| 10 | #include "video_core/swrasterizer.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 8 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 9 | #include "video_core/swrasterizer.h" | ||
| 10 | #include "video_core/video_core.h" | ||
| 12 | 11 | ||
| 13 | void RendererBase::RefreshRasterizerSetting() { | 12 | void RendererBase::RefreshRasterizerSetting() { |
| 14 | bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; | 13 | bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; |
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index f68091cc8..589aca857 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h | |||
| @@ -5,25 +5,17 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | |||
| 9 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 10 | |||
| 11 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 12 | 10 | ||
| 13 | class EmuWindow; | 11 | class EmuWindow; |
| 14 | 12 | ||
| 15 | class RendererBase : NonCopyable { | 13 | class RendererBase : NonCopyable { |
| 16 | public: | 14 | public: |
| 17 | |||
| 18 | /// Used to reference a framebuffer | 15 | /// Used to reference a framebuffer |
| 19 | enum kFramebuffer { | 16 | enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture }; |
| 20 | kFramebuffer_VirtualXFB = 0, | ||
| 21 | kFramebuffer_EFB, | ||
| 22 | kFramebuffer_Texture | ||
| 23 | }; | ||
| 24 | 17 | ||
| 25 | virtual ~RendererBase() { | 18 | virtual ~RendererBase() {} |
| 26 | } | ||
| 27 | 19 | ||
| 28 | /// Swap buffers (render frame) | 20 | /// Swap buffers (render frame) |
| 29 | virtual void SwapBuffers() = 0; | 21 | virtual void SwapBuffers() = 0; |
| @@ -59,8 +51,8 @@ public: | |||
| 59 | 51 | ||
| 60 | protected: | 52 | protected: |
| 61 | std::unique_ptr<VideoCore::RasterizerInterface> rasterizer; | 53 | std::unique_ptr<VideoCore::RasterizerInterface> rasterizer; |
| 62 | f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer | 54 | f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer |
| 63 | int m_current_frame = 0; ///< Current frame, should be set by the renderer | 55 | int m_current_frame = 0; ///< Current frame, should be set by the renderer |
| 64 | 56 | ||
| 65 | private: | 57 | private: |
| 66 | bool opengl_rasterizer_active = false; | 58 | bool opengl_rasterizer_active = false; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f8393c618..60c9d9180 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -6,17 +6,13 @@ | |||
| 6 | #include <string> | 6 | #include <string> |
| 7 | #include <tuple> | 7 | #include <tuple> |
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | |||
| 10 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 11 | |||
| 12 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 13 | #include "common/color.h" | 11 | #include "common/color.h" |
| 14 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 15 | #include "common/math_util.h" | 13 | #include "common/math_util.h" |
| 16 | #include "common/vector_math.h" | 14 | #include "common/vector_math.h" |
| 17 | |||
| 18 | #include "core/hw/gpu.h" | 15 | #include "core/hw/gpu.h" |
| 19 | |||
| 20 | #include "video_core/pica.h" | 16 | #include "video_core/pica.h" |
| 21 | #include "video_core/pica_state.h" | 17 | #include "video_core/pica_state.h" |
| 22 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -32,8 +28,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { | |||
| 32 | stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && | 28 | stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && |
| 33 | stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && | 29 | stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && |
| 34 | stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && | 30 | stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && |
| 35 | stage.GetColorMultiplier() == 1 && | 31 | stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); |
| 36 | stage.GetAlphaMultiplier() == 1); | ||
| 37 | } | 32 | } |
| 38 | 33 | ||
| 39 | RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | 34 | RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { |
| @@ -65,26 +60,34 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | |||
| 65 | uniform_block_data.fog_lut_dirty = true; | 60 | uniform_block_data.fog_lut_dirty = true; |
| 66 | 61 | ||
| 67 | // Set vertex attributes | 62 | // Set vertex attributes |
| 68 | glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); | 63 | glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, |
| 64 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); | ||
| 69 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); | 65 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); |
| 70 | 66 | ||
| 71 | glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); | 67 | glVertexAttribPointer(GLShader::ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), |
| 68 | (GLvoid*)offsetof(HardwareVertex, color)); | ||
| 72 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_COLOR); | 69 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_COLOR); |
| 73 | 70 | ||
| 74 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); | 71 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, |
| 75 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); | 72 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); |
| 76 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); | 73 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, |
| 74 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); | ||
| 75 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, | ||
| 76 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); | ||
| 77 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0); | 77 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0); |
| 78 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); | 78 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); |
| 79 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); | 79 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); |
| 80 | 80 | ||
| 81 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); | 81 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, |
| 82 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); | ||
| 82 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); | 83 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); |
| 83 | 84 | ||
| 84 | glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); | 85 | glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, |
| 86 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); | ||
| 85 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); | 87 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); |
| 86 | 88 | ||
| 87 | glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); | 89 | glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), |
| 90 | (GLvoid*)offsetof(HardwareVertex, view)); | ||
| 88 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); | 91 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); |
| 89 | 92 | ||
| 90 | // Create render framebuffer | 93 | // Create render framebuffer |
| @@ -129,9 +132,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | |||
| 129 | SyncDepthWriteMask(); | 132 | SyncDepthWriteMask(); |
| 130 | } | 133 | } |
| 131 | 134 | ||
| 132 | RasterizerOpenGL::~RasterizerOpenGL() { | 135 | RasterizerOpenGL::~RasterizerOpenGL() {} |
| 133 | |||
| 134 | } | ||
| 135 | 136 | ||
| 136 | /** | 137 | /** |
| 137 | * This is a helper function to resolve an issue with opposite quaternions being interpolated by | 138 | * This is a helper function to resolve an issue with opposite quaternions being interpolated by |
| @@ -149,8 +150,8 @@ RasterizerOpenGL::~RasterizerOpenGL() { | |||
| 149 | * manually using two Lerps, and doing this correction before each Lerp. | 150 | * manually using two Lerps, and doing this correction before each Lerp. |
| 150 | */ | 151 | */ |
| 151 | static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { | 152 | static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { |
| 152 | Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; | 153 | Math::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; |
| 153 | Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; | 154 | Math::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; |
| 154 | 155 | ||
| 155 | return (Math::Dot(a, b) < 0.f); | 156 | return (Math::Dot(a, b) < 0.f); |
| 156 | } | 157 | } |
| @@ -173,15 +174,20 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 173 | CachedSurface* color_surface; | 174 | CachedSurface* color_surface; |
| 174 | CachedSurface* depth_surface; | 175 | CachedSurface* depth_surface; |
| 175 | MathUtil::Rectangle<int> rect; | 176 | MathUtil::Rectangle<int> rect; |
| 176 | std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer); | 177 | std::tie(color_surface, depth_surface, rect) = |
| 178 | res_cache.GetFramebufferSurfaces(regs.framebuffer); | ||
| 177 | 179 | ||
| 178 | state.draw.draw_framebuffer = framebuffer.handle; | 180 | state.draw.draw_framebuffer = framebuffer.handle; |
| 179 | state.Apply(); | 181 | state.Apply(); |
| 180 | 182 | ||
| 181 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); | 183 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 182 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); | 184 | color_surface != nullptr ? color_surface->texture.handle : 0, 0); |
| 185 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 186 | depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); | ||
| 183 | bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; | 187 | bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; |
| 184 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); | 188 | glFramebufferTexture2D( |
| 189 | GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 190 | (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); | ||
| 185 | 191 | ||
| 186 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | 192 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { |
| 187 | return; | 193 | return; |
| @@ -194,7 +200,8 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 194 | 200 | ||
| 195 | glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width), | 201 | glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width), |
| 196 | (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), | 202 | (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), |
| 197 | (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height)); | 203 | (GLsizei)(viewport_width * color_surface->res_scale_width), |
| 204 | (GLsizei)(viewport_height * color_surface->res_scale_height)); | ||
| 198 | 205 | ||
| 199 | if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || | 206 | if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || |
| 200 | uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { | 207 | uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { |
| @@ -245,14 +252,16 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 245 | 252 | ||
| 246 | // Sync the uniform data | 253 | // Sync the uniform data |
| 247 | if (uniform_block_data.dirty) { | 254 | if (uniform_block_data.dirty) { |
| 248 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); | 255 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, |
| 256 | GL_STATIC_DRAW); | ||
| 249 | uniform_block_data.dirty = false; | 257 | uniform_block_data.dirty = false; |
| 250 | } | 258 | } |
| 251 | 259 | ||
| 252 | state.Apply(); | 260 | state.Apply(); |
| 253 | 261 | ||
| 254 | // Draw the vertex batch | 262 | // Draw the vertex batch |
| 255 | glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); | 263 | glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), |
| 264 | GL_STREAM_DRAW); | ||
| 256 | glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); | 265 | glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); |
| 257 | 266 | ||
| 258 | // Mark framebuffer surfaces as dirty | 267 | // Mark framebuffer surfaces as dirty |
| @@ -278,7 +287,7 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 278 | void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | 287 | void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { |
| 279 | const auto& regs = Pica::g_state.regs; | 288 | const auto& regs = Pica::g_state.regs; |
| 280 | 289 | ||
| 281 | switch(id) { | 290 | switch (id) { |
| 282 | // Culling | 291 | // Culling |
| 283 | case PICA_REG_INDEX(cull_mode): | 292 | case PICA_REG_INDEX(cull_mode): |
| 284 | SyncCullMode(); | 293 | SyncCullMode(); |
| @@ -548,7 +557,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 548 | SyncLightAmbient(7); | 557 | SyncLightAmbient(7); |
| 549 | break; | 558 | break; |
| 550 | 559 | ||
| 551 | // Fragment lighting position | 560 | // Fragment lighting position |
| 552 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): | 561 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): |
| 553 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): | 562 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): |
| 554 | SyncLightPosition(0); | 563 | SyncLightPosition(0); |
| @@ -659,13 +668,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 659 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): | 668 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): |
| 660 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): | 669 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): |
| 661 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): | 670 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): |
| 662 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): | 671 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { |
| 663 | { | ||
| 664 | auto& lut_config = regs.lighting.lut_config; | 672 | auto& lut_config = regs.lighting.lut_config; |
| 665 | uniform_block_data.lut_dirty[lut_config.type / 4] = true; | 673 | uniform_block_data.lut_dirty[lut_config.type / 4] = true; |
| 666 | break; | 674 | break; |
| 667 | } | 675 | } |
| 668 | |||
| 669 | } | 676 | } |
| 670 | } | 677 | } |
| 671 | 678 | ||
| @@ -699,8 +706,10 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe | |||
| 699 | 706 | ||
| 700 | CachedSurface dst_params; | 707 | CachedSurface dst_params; |
| 701 | dst_params.addr = config.GetPhysicalOutputAddress(); | 708 | dst_params.addr = config.GetPhysicalOutputAddress(); |
| 702 | dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); | 709 | dst_params.width = |
| 703 | dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); | 710 | config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); |
| 711 | dst_params.height = | ||
| 712 | config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); | ||
| 704 | dst_params.is_tiled = config.input_linear != config.dont_swizzle; | 713 | dst_params.is_tiled = config.input_linear != config.dont_swizzle; |
| 705 | dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); | 714 | dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); |
| 706 | 715 | ||
| @@ -735,7 +744,8 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe | |||
| 735 | return false; | 744 | return false; |
| 736 | } | 745 | } |
| 737 | 746 | ||
| 738 | u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; | 747 | u32 dst_size = dst_params.width * dst_params.height * |
| 748 | CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; | ||
| 739 | dst_surface->dirty = true; | 749 | dst_surface->dirty = true; |
| 740 | res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); | 750 | res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); |
| 741 | return true; | 751 | return true; |
| @@ -757,12 +767,15 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) | |||
| 757 | 767 | ||
| 758 | GLuint old_fb = cur_state.draw.draw_framebuffer; | 768 | GLuint old_fb = cur_state.draw.draw_framebuffer; |
| 759 | cur_state.draw.draw_framebuffer = framebuffer.handle; | 769 | cur_state.draw.draw_framebuffer = framebuffer.handle; |
| 760 | // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected | 770 | // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so |
| 771 | // Clear call isn't affected | ||
| 761 | cur_state.Apply(); | 772 | cur_state.Apply(); |
| 762 | 773 | ||
| 763 | if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { | 774 | if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { |
| 764 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0); | 775 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 765 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | 776 | dst_surface->texture.handle, 0); |
| 777 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 778 | 0); | ||
| 766 | 779 | ||
| 767 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | 780 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { |
| 768 | return false; | 781 | return false; |
| @@ -770,8 +783,10 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) | |||
| 770 | 783 | ||
| 771 | GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; | 784 | GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; |
| 772 | 785 | ||
| 773 | // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases | 786 | // TODO: Handle additional pixel format and fill value size combinations to accelerate more |
| 774 | // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/... | 787 | // cases |
| 788 | // For instance, checking if fill value's bytes/bits repeat to allow filling | ||
| 789 | // I8/A8/I4/A4/... | ||
| 775 | // Currently only handles formats that are multiples of the fill value size | 790 | // Currently only handles formats that are multiples of the fill value size |
| 776 | 791 | ||
| 777 | if (config.fill_24bit) { | 792 | if (config.fill_24bit) { |
| @@ -846,7 +861,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) | |||
| 846 | glClearBufferfv(GL_COLOR, 0, color_values); | 861 | glClearBufferfv(GL_COLOR, 0, color_values); |
| 847 | } else if (dst_type == SurfaceType::Depth) { | 862 | } else if (dst_type == SurfaceType::Depth) { |
| 848 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 863 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
| 849 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); | 864 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, |
| 865 | dst_surface->texture.handle, 0); | ||
| 850 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | 866 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); |
| 851 | 867 | ||
| 852 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | 868 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { |
| @@ -865,7 +881,8 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) | |||
| 865 | glClearBufferfv(GL_DEPTH, 0, &value_float); | 881 | glClearBufferfv(GL_DEPTH, 0, &value_float); |
| 866 | } else if (dst_type == SurfaceType::DepthStencil) { | 882 | } else if (dst_type == SurfaceType::DepthStencil) { |
| 867 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 883 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
| 868 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0); | 884 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, |
| 885 | dst_surface->texture.handle, 0); | ||
| 869 | 886 | ||
| 870 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | 887 | if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { |
| 871 | return false; | 888 | return false; |
| @@ -889,7 +906,9 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) | |||
| 889 | return true; | 906 | return true; |
| 890 | } | 907 | } |
| 891 | 908 | ||
| 892 | bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { | 909 | bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, |
| 910 | PAddr framebuffer_addr, u32 pixel_stride, | ||
| 911 | ScreenInfo& screen_info) { | ||
| 893 | if (framebuffer_addr == 0) { | 912 | if (framebuffer_addr == 0) { |
| 894 | return false; | 913 | return false; |
| 895 | } | 914 | } |
| @@ -912,10 +931,9 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con | |||
| 912 | u32 scaled_width = src_surface->GetScaledWidth(); | 931 | u32 scaled_width = src_surface->GetScaledWidth(); |
| 913 | u32 scaled_height = src_surface->GetScaledHeight(); | 932 | u32 scaled_height = src_surface->GetScaledHeight(); |
| 914 | 933 | ||
| 915 | screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height, | 934 | screen_info.display_texcoords = MathUtil::Rectangle<float>( |
| 916 | (float)src_rect.left / (float)scaled_width, | 935 | (float)src_rect.top / (float)scaled_height, (float)src_rect.left / (float)scaled_width, |
| 917 | (float)src_rect.bottom / (float)scaled_height, | 936 | (float)src_rect.bottom / (float)scaled_height, (float)src_rect.right / (float)scaled_width); |
| 918 | (float)src_rect.right / (float)scaled_width); | ||
| 919 | 937 | ||
| 920 | screen_info.display_texture = src_surface->texture.handle; | 938 | screen_info.display_texture = src_surface->texture.handle; |
| 921 | 939 | ||
| @@ -928,7 +946,8 @@ void RasterizerOpenGL::SamplerInfo::Create() { | |||
| 928 | wrap_s = wrap_t = TextureConfig::Repeat; | 946 | wrap_s = wrap_t = TextureConfig::Repeat; |
| 929 | border_color = 0; | 947 | border_color = 0; |
| 930 | 948 | ||
| 931 | glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR | 949 | glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, |
| 950 | GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR | ||
| 932 | // Other attributes have correct defaults | 951 | // Other attributes have correct defaults |
| 933 | } | 952 | } |
| 934 | 953 | ||
| @@ -976,41 +995,64 @@ void RasterizerOpenGL::SetShader() { | |||
| 976 | } else { | 995 | } else { |
| 977 | LOG_DEBUG(Render_OpenGL, "Creating new shader"); | 996 | LOG_DEBUG(Render_OpenGL, "Creating new shader"); |
| 978 | 997 | ||
| 979 | shader->shader.Create(GLShader::GenerateVertexShader().c_str(), GLShader::GenerateFragmentShader(config).c_str()); | 998 | shader->shader.Create(GLShader::GenerateVertexShader().c_str(), |
| 999 | GLShader::GenerateFragmentShader(config).c_str()); | ||
| 980 | 1000 | ||
| 981 | state.draw.shader_program = shader->shader.handle; | 1001 | state.draw.shader_program = shader->shader.handle; |
| 982 | state.Apply(); | 1002 | state.Apply(); |
| 983 | 1003 | ||
| 984 | // Set the texture samplers to correspond to different texture units | 1004 | // Set the texture samplers to correspond to different texture units |
| 985 | GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); | 1005 | GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); |
| 986 | if (uniform_tex != -1) { glUniform1i(uniform_tex, 0); } | 1006 | if (uniform_tex != -1) { |
| 1007 | glUniform1i(uniform_tex, 0); | ||
| 1008 | } | ||
| 987 | uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); | 1009 | uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); |
| 988 | if (uniform_tex != -1) { glUniform1i(uniform_tex, 1); } | 1010 | if (uniform_tex != -1) { |
| 1011 | glUniform1i(uniform_tex, 1); | ||
| 1012 | } | ||
| 989 | uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); | 1013 | uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); |
| 990 | if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } | 1014 | if (uniform_tex != -1) { |
| 1015 | glUniform1i(uniform_tex, 2); | ||
| 1016 | } | ||
| 991 | 1017 | ||
| 992 | // Set the texture samplers to correspond to different lookup table texture units | 1018 | // Set the texture samplers to correspond to different lookup table texture units |
| 993 | GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); | 1019 | GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); |
| 994 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } | 1020 | if (uniform_lut != -1) { |
| 1021 | glUniform1i(uniform_lut, 3); | ||
| 1022 | } | ||
| 995 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); | 1023 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); |
| 996 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } | 1024 | if (uniform_lut != -1) { |
| 1025 | glUniform1i(uniform_lut, 4); | ||
| 1026 | } | ||
| 997 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); | 1027 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); |
| 998 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } | 1028 | if (uniform_lut != -1) { |
| 1029 | glUniform1i(uniform_lut, 5); | ||
| 1030 | } | ||
| 999 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); | 1031 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); |
| 1000 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } | 1032 | if (uniform_lut != -1) { |
| 1033 | glUniform1i(uniform_lut, 6); | ||
| 1034 | } | ||
| 1001 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); | 1035 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); |
| 1002 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } | 1036 | if (uniform_lut != -1) { |
| 1037 | glUniform1i(uniform_lut, 7); | ||
| 1038 | } | ||
| 1003 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); | 1039 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); |
| 1004 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } | 1040 | if (uniform_lut != -1) { |
| 1041 | glUniform1i(uniform_lut, 8); | ||
| 1042 | } | ||
| 1005 | 1043 | ||
| 1006 | GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); | 1044 | GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); |
| 1007 | if (uniform_fog_lut != -1) { glUniform1i(uniform_fog_lut, 9); } | 1045 | if (uniform_fog_lut != -1) { |
| 1046 | glUniform1i(uniform_fog_lut, 9); | ||
| 1047 | } | ||
| 1008 | 1048 | ||
| 1009 | current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); | 1049 | current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); |
| 1010 | 1050 | ||
| 1011 | unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); | 1051 | unsigned int block_index = |
| 1052 | glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); | ||
| 1012 | GLint block_size; | 1053 | GLint block_size; |
| 1013 | glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); | 1054 | glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, |
| 1055 | GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); | ||
| 1014 | ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!"); | 1056 | ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!"); |
| 1015 | glUniformBlockBinding(current_shader->shader.handle, block_index, 0); | 1057 | glUniformBlockBinding(current_shader->shader.handle, block_index, 0); |
| 1016 | 1058 | ||
| @@ -1073,7 +1115,8 @@ void RasterizerOpenGL::SyncDepthScale() { | |||
| 1073 | } | 1115 | } |
| 1074 | 1116 | ||
| 1075 | void RasterizerOpenGL::SyncDepthOffset() { | 1117 | void RasterizerOpenGL::SyncDepthOffset() { |
| 1076 | float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); | 1118 | float depth_offset = |
| 1119 | Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); | ||
| 1077 | if (depth_offset != uniform_block_data.data.depth_offset) { | 1120 | if (depth_offset != uniform_block_data.data.depth_offset) { |
| 1078 | uniform_block_data.data.depth_offset = depth_offset; | 1121 | uniform_block_data.data.depth_offset = depth_offset; |
| 1079 | uniform_block_data.dirty = true; | 1122 | uniform_block_data.dirty = true; |
| @@ -1086,10 +1129,14 @@ void RasterizerOpenGL::SyncBlendEnabled() { | |||
| 1086 | 1129 | ||
| 1087 | void RasterizerOpenGL::SyncBlendFuncs() { | 1130 | void RasterizerOpenGL::SyncBlendFuncs() { |
| 1088 | const auto& regs = Pica::g_state.regs; | 1131 | const auto& regs = Pica::g_state.regs; |
| 1089 | state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); | 1132 | state.blend.rgb_equation = |
| 1090 | state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); | 1133 | PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); |
| 1091 | state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); | 1134 | state.blend.a_equation = |
| 1092 | state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); | 1135 | PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); |
| 1136 | state.blend.src_rgb_func = | ||
| 1137 | PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); | ||
| 1138 | state.blend.dst_rgb_func = | ||
| 1139 | PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); | ||
| 1093 | state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); | 1140 | state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); |
| 1094 | state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); | 1141 | state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); |
| 1095 | } | 1142 | } |
| @@ -1105,9 +1152,8 @@ void RasterizerOpenGL::SyncBlendColor() { | |||
| 1105 | void RasterizerOpenGL::SyncFogColor() { | 1152 | void RasterizerOpenGL::SyncFogColor() { |
| 1106 | const auto& regs = Pica::g_state.regs; | 1153 | const auto& regs = Pica::g_state.regs; |
| 1107 | uniform_block_data.data.fog_color = { | 1154 | uniform_block_data.data.fog_color = { |
| 1108 | regs.fog_color.r.Value() / 255.0f, | 1155 | regs.fog_color.r.Value() / 255.0f, regs.fog_color.g.Value() / 255.0f, |
| 1109 | regs.fog_color.g.Value() / 255.0f, | 1156 | regs.fog_color.b.Value() / 255.0f, |
| 1110 | regs.fog_color.b.Value() / 255.0f | ||
| 1111 | }; | 1157 | }; |
| 1112 | uniform_block_data.dirty = true; | 1158 | uniform_block_data.dirty = true; |
| 1113 | } | 1159 | } |
| @@ -1115,14 +1161,14 @@ void RasterizerOpenGL::SyncFogColor() { | |||
| 1115 | void RasterizerOpenGL::SyncFogLUT() { | 1161 | void RasterizerOpenGL::SyncFogLUT() { |
| 1116 | std::array<GLuint, 128> new_data; | 1162 | std::array<GLuint, 128> new_data; |
| 1117 | 1163 | ||
| 1118 | std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), [](const auto& entry) { | 1164 | std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), |
| 1119 | return entry.raw; | 1165 | [](const auto& entry) { return entry.raw; }); |
| 1120 | }); | ||
| 1121 | 1166 | ||
| 1122 | if (new_data != fog_lut_data) { | 1167 | if (new_data != fog_lut_data) { |
| 1123 | fog_lut_data = new_data; | 1168 | fog_lut_data = new_data; |
| 1124 | glActiveTexture(GL_TEXTURE9); | 1169 | glActiveTexture(GL_TEXTURE9); |
| 1125 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, fog_lut_data.data()); | 1170 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, |
| 1171 | fog_lut_data.data()); | ||
| 1126 | } | 1172 | } |
| 1127 | } | 1173 | } |
| 1128 | 1174 | ||
| @@ -1154,34 +1200,40 @@ void RasterizerOpenGL::SyncColorWriteMask() { | |||
| 1154 | void RasterizerOpenGL::SyncStencilWriteMask() { | 1200 | void RasterizerOpenGL::SyncStencilWriteMask() { |
| 1155 | const auto& regs = Pica::g_state.regs; | 1201 | const auto& regs = Pica::g_state.regs; |
| 1156 | state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0) | 1202 | state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0) |
| 1157 | ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) | 1203 | ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) |
| 1158 | : 0; | 1204 | : 0; |
| 1159 | } | 1205 | } |
| 1160 | 1206 | ||
| 1161 | void RasterizerOpenGL::SyncDepthWriteMask() { | 1207 | void RasterizerOpenGL::SyncDepthWriteMask() { |
| 1162 | const auto& regs = Pica::g_state.regs; | 1208 | const auto& regs = Pica::g_state.regs; |
| 1163 | state.depth.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) | 1209 | state.depth.write_mask = |
| 1164 | ? GL_TRUE | 1210 | (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) |
| 1165 | : GL_FALSE; | 1211 | ? GL_TRUE |
| 1212 | : GL_FALSE; | ||
| 1166 | } | 1213 | } |
| 1167 | 1214 | ||
| 1168 | void RasterizerOpenGL::SyncStencilTest() { | 1215 | void RasterizerOpenGL::SyncStencilTest() { |
| 1169 | const auto& regs = Pica::g_state.regs; | 1216 | const auto& regs = Pica::g_state.regs; |
| 1170 | state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; | 1217 | state.stencil.test_enabled = regs.output_merger.stencil_test.enable && |
| 1218 | regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; | ||
| 1171 | state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); | 1219 | state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); |
| 1172 | state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; | 1220 | state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; |
| 1173 | state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; | 1221 | state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; |
| 1174 | state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); | 1222 | state.stencil.action_stencil_fail = |
| 1175 | state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); | 1223 | PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); |
| 1176 | state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); | 1224 | state.stencil.action_depth_fail = |
| 1225 | PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); | ||
| 1226 | state.stencil.action_depth_pass = | ||
| 1227 | PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); | ||
| 1177 | } | 1228 | } |
| 1178 | 1229 | ||
| 1179 | void RasterizerOpenGL::SyncDepthTest() { | 1230 | void RasterizerOpenGL::SyncDepthTest() { |
| 1180 | const auto& regs = Pica::g_state.regs; | 1231 | const auto& regs = Pica::g_state.regs; |
| 1181 | state.depth.test_enabled = regs.output_merger.depth_test_enable == 1 || | 1232 | state.depth.test_enabled = |
| 1182 | regs.output_merger.depth_write_enable == 1; | 1233 | regs.output_merger.depth_test_enable == 1 || regs.output_merger.depth_write_enable == 1; |
| 1183 | state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? | 1234 | state.depth.test_func = regs.output_merger.depth_test_enable == 1 |
| 1184 | PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; | 1235 | ? PicaToGL::CompareFunc(regs.output_merger.depth_test_func) |
| 1236 | : GL_ALWAYS; | ||
| 1185 | } | 1237 | } |
| 1186 | 1238 | ||
| 1187 | void RasterizerOpenGL::SyncScissorTest() { | 1239 | void RasterizerOpenGL::SyncScissorTest() { |
| @@ -1208,7 +1260,8 @@ void RasterizerOpenGL::SyncCombinerColor() { | |||
| 1208 | } | 1260 | } |
| 1209 | } | 1261 | } |
| 1210 | 1262 | ||
| 1211 | void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevStageConfig& tev_stage) { | 1263 | void RasterizerOpenGL::SyncTevConstColor(int stage_index, |
| 1264 | const Pica::Regs::TevStageConfig& tev_stage) { | ||
| 1212 | auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); | 1265 | auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); |
| 1213 | if (const_color != uniform_block_data.data.const_color[stage_index]) { | 1266 | if (const_color != uniform_block_data.data.const_color[stage_index]) { |
| 1214 | uniform_block_data.data.const_color[stage_index] = const_color; | 1267 | uniform_block_data.data.const_color[stage_index] = const_color; |
| @@ -1237,7 +1290,8 @@ void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { | |||
| 1237 | if (new_data != lighting_lut_data[lut_index]) { | 1290 | if (new_data != lighting_lut_data[lut_index]) { |
| 1238 | lighting_lut_data[lut_index] = new_data; | 1291 | lighting_lut_data[lut_index] = new_data; |
| 1239 | glActiveTexture(GL_TEXTURE3 + lut_index); | 1292 | glActiveTexture(GL_TEXTURE3 + lut_index); |
| 1240 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); | 1293 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, |
| 1294 | lighting_lut_data[lut_index].data()); | ||
| 1241 | } | 1295 | } |
| 1242 | } | 1296 | } |
| 1243 | 1297 | ||
| @@ -1277,7 +1331,7 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { | |||
| 1277 | GLvec3 position = { | 1331 | GLvec3 position = { |
| 1278 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), | 1332 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), |
| 1279 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), | 1333 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), |
| 1280 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; | 1334 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()}; |
| 1281 | 1335 | ||
| 1282 | if (position != uniform_block_data.data.light_src[light_index].position) { | 1336 | if (position != uniform_block_data.data.light_src[light_index].position) { |
| 1283 | uniform_block_data.data.light_src[light_index].position = position; | 1337 | uniform_block_data.data.light_src[light_index].position = position; |
| @@ -1286,7 +1340,9 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { | |||
| 1286 | } | 1340 | } |
| 1287 | 1341 | ||
| 1288 | void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { | 1342 | void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { |
| 1289 | GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); | 1343 | GLfloat dist_atten_bias = |
| 1344 | Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) | ||
| 1345 | .ToFloat32(); | ||
| 1290 | 1346 | ||
| 1291 | if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { | 1347 | if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { |
| 1292 | uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; | 1348 | uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; |
| @@ -1295,7 +1351,9 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { | |||
| 1295 | } | 1351 | } |
| 1296 | 1352 | ||
| 1297 | void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { | 1353 | void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { |
| 1298 | GLfloat dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); | 1354 | GLfloat dist_atten_scale = |
| 1355 | Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) | ||
| 1356 | .ToFloat32(); | ||
| 1299 | 1357 | ||
| 1300 | if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { | 1358 | if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { |
| 1301 | uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; | 1359 | uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c5029432b..24fefed1b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -8,18 +8,14 @@ | |||
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <cstring> | 9 | #include <cstring> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <vector> | ||
| 12 | #include <unordered_map> | 11 | #include <unordered_map> |
| 13 | 12 | #include <vector> | |
| 14 | #include <glad/glad.h> | 13 | #include <glad/glad.h> |
| 15 | |||
| 16 | #include "common/bit_field.h" | 14 | #include "common/bit_field.h" |
| 17 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 18 | #include "common/hash.h" | 16 | #include "common/hash.h" |
| 19 | #include "common/vector_math.h" | 17 | #include "common/vector_math.h" |
| 20 | |||
| 21 | #include "core/hw/gpu.h" | 18 | #include "core/hw/gpu.h" |
| 22 | |||
| 23 | #include "video_core/pica.h" | 19 | #include "video_core/pica.h" |
| 24 | #include "video_core/pica_state.h" | 20 | #include "video_core/pica_state.h" |
| 25 | #include "video_core/pica_types.h" | 21 | #include "video_core/pica_types.h" |
| @@ -40,10 +36,10 @@ struct ScreenInfo; | |||
| 40 | * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) | 36 | * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) |
| 41 | * two separate shaders sharing the same key. | 37 | * two separate shaders sharing the same key. |
| 42 | * | 38 | * |
| 43 | * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." | 39 | * We use a union because "implicitly-defined copy/move constructor for a union X copies the object |
| 44 | * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." | 40 | * representation of X." and "implicitly-defined copy assignment operator for a union X copies the |
| 45 | * = Bytewise copy instead of memberwise copy. | 41 | * object representation (3.9) of X." = Bytewise copy instead of memberwise copy. This is important |
| 46 | * This is important because the padding bytes are included in the hash and comparison between objects. | 42 | * because the padding bytes are included in the hash and comparison between objects. |
| 47 | */ | 43 | */ |
| 48 | union PicaShaderConfig { | 44 | union PicaShaderConfig { |
| 49 | 45 | ||
| @@ -60,8 +56,9 @@ union PicaShaderConfig { | |||
| 60 | 56 | ||
| 61 | state.depthmap_enable = regs.depthmap_enable; | 57 | state.depthmap_enable = regs.depthmap_enable; |
| 62 | 58 | ||
| 63 | state.alpha_test_func = regs.output_merger.alpha_test.enable ? | 59 | state.alpha_test_func = regs.output_merger.alpha_test.enable |
| 64 | regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; | 60 | ? regs.output_merger.alpha_test.func.Value() |
| 61 | : Pica::Regs::CompareFunc::Always; | ||
| 65 | 62 | ||
| 66 | state.texture0_type = regs.texture0.type; | 63 | state.texture0_type = regs.texture0.type; |
| 67 | 64 | ||
| @@ -81,9 +78,8 @@ union PicaShaderConfig { | |||
| 81 | state.fog_mode = regs.fog_mode; | 78 | state.fog_mode = regs.fog_mode; |
| 82 | state.fog_flip = regs.fog_flip; | 79 | state.fog_flip = regs.fog_flip; |
| 83 | 80 | ||
| 84 | state.combiner_buffer_input = | 81 | state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | |
| 85 | regs.tev_combiner_buffer_input.update_mask_rgb.Value() | | 82 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; |
| 86 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; | ||
| 87 | 83 | ||
| 88 | // Fragment lighting | 84 | // Fragment lighting |
| 89 | 85 | ||
| @@ -95,8 +91,10 @@ union PicaShaderConfig { | |||
| 95 | const auto& light = regs.lighting.light[num]; | 91 | const auto& light = regs.lighting.light[num]; |
| 96 | state.lighting.light[light_index].num = num; | 92 | state.lighting.light[light_index].num = num; |
| 97 | state.lighting.light[light_index].directional = light.config.directional != 0; | 93 | state.lighting.light[light_index].directional = light.config.directional != 0; |
| 98 | state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; | 94 | state.lighting.light[light_index].two_sided_diffuse = |
| 99 | state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); | 95 | light.config.two_sided_diffuse != 0; |
| 96 | state.lighting.light[light_index].dist_atten_enable = | ||
| 97 | !regs.lighting.IsDistAttenDisabled(num); | ||
| 100 | } | 98 | } |
| 101 | 99 | ||
| 102 | state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; | 100 | state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; |
| @@ -147,7 +145,7 @@ union PicaShaderConfig { | |||
| 147 | return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); | 145 | return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); |
| 148 | } | 146 | } |
| 149 | 147 | ||
| 150 | bool operator ==(const PicaShaderConfig& o) const { | 148 | bool operator==(const PicaShaderConfig& o) const { |
| 151 | return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; | 149 | return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; |
| 152 | }; | 150 | }; |
| 153 | 151 | ||
| @@ -212,7 +210,8 @@ union PicaShaderConfig { | |||
| 212 | } state; | 210 | } state; |
| 213 | }; | 211 | }; |
| 214 | #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) | 212 | #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) |
| 215 | static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable"); | 213 | static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, |
| 214 | "PicaShaderConfig::State must be trivially copyable"); | ||
| 216 | #endif | 215 | #endif |
| 217 | 216 | ||
| 218 | namespace std { | 217 | namespace std { |
| @@ -228,12 +227,10 @@ struct hash<PicaShaderConfig> { | |||
| 228 | 227 | ||
| 229 | class RasterizerOpenGL : public VideoCore::RasterizerInterface { | 228 | class RasterizerOpenGL : public VideoCore::RasterizerInterface { |
| 230 | public: | 229 | public: |
| 231 | |||
| 232 | RasterizerOpenGL(); | 230 | RasterizerOpenGL(); |
| 233 | ~RasterizerOpenGL() override; | 231 | ~RasterizerOpenGL() override; |
| 234 | 232 | ||
| 235 | void AddTriangle(const Pica::Shader::OutputVertex& v0, | 233 | void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, |
| 236 | const Pica::Shader::OutputVertex& v1, | ||
| 237 | const Pica::Shader::OutputVertex& v2) override; | 234 | const Pica::Shader::OutputVertex& v2) override; |
| 238 | void DrawTriangles() override; | 235 | void DrawTriangles() override; |
| 239 | void NotifyPicaRegisterChanged(u32 id) override; | 236 | void NotifyPicaRegisterChanged(u32 id) override; |
| @@ -242,7 +239,8 @@ public: | |||
| 242 | void FlushAndInvalidateRegion(PAddr addr, u32 size) override; | 239 | void FlushAndInvalidateRegion(PAddr addr, u32 size) override; |
| 243 | bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; | 240 | bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; |
| 244 | bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; | 241 | bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; |
| 245 | bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; | 242 | bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, |
| 243 | u32 pixel_stride, ScreenInfo& screen_info) override; | ||
| 246 | 244 | ||
| 247 | /// OpenGL shader generated for a given Pica register state | 245 | /// OpenGL shader generated for a given Pica register state |
| 248 | struct PicaShader { | 246 | struct PicaShader { |
| @@ -251,13 +249,13 @@ public: | |||
| 251 | }; | 249 | }; |
| 252 | 250 | ||
| 253 | private: | 251 | private: |
| 254 | |||
| 255 | struct SamplerInfo { | 252 | struct SamplerInfo { |
| 256 | using TextureConfig = Pica::Regs::TextureConfig; | 253 | using TextureConfig = Pica::Regs::TextureConfig; |
| 257 | 254 | ||
| 258 | OGLSampler sampler; | 255 | OGLSampler sampler; |
| 259 | 256 | ||
| 260 | /// Creates the sampler object, initializing its state so that it's in sync with the SamplerInfo struct. | 257 | /// Creates the sampler object, initializing its state so that it's in sync with the |
| 258 | /// SamplerInfo struct. | ||
| 261 | void Create(); | 259 | void Create(); |
| 262 | /// Syncs the sampler object with the config, updating any necessary state. | 260 | /// Syncs the sampler object with the config, updating any necessary state. |
| 263 | void SyncWithConfig(const TextureConfig& config); | 261 | void SyncWithConfig(const TextureConfig& config); |
| @@ -343,8 +341,11 @@ private: | |||
| 343 | alignas(16) GLvec4 tev_combiner_buffer_color; | 341 | alignas(16) GLvec4 tev_combiner_buffer_color; |
| 344 | }; | 342 | }; |
| 345 | 343 | ||
| 346 | static_assert(sizeof(UniformData) == 0x3C0, "The size of the UniformData structure has changed, update the structure in the shader"); | 344 | static_assert( |
| 347 | static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); | 345 | sizeof(UniformData) == 0x3C0, |
| 346 | "The size of the UniformData structure has changed, update the structure in the shader"); | ||
| 347 | static_assert(sizeof(UniformData) < 16384, | ||
| 348 | "UniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 348 | 349 | ||
| 349 | /// Sets the OpenGL shader in accordance with the current PICA register state | 350 | /// Sets the OpenGL shader in accordance with the current PICA register state |
| 350 | void SetShader(); | 351 | void SetShader(); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7efd0038a..5cbad9b43 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -9,18 +9,14 @@ | |||
| 9 | #include <unordered_set> | 9 | #include <unordered_set> |
| 10 | #include <utility> | 10 | #include <utility> |
| 11 | #include <vector> | 11 | #include <vector> |
| 12 | |||
| 13 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| 14 | |||
| 15 | #include "common/bit_field.h" | 13 | #include "common/bit_field.h" |
| 16 | #include "common/emu_window.h" | 14 | #include "common/emu_window.h" |
| 17 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 18 | #include "common/math_util.h" | 16 | #include "common/math_util.h" |
| 19 | #include "common/microprofile.h" | 17 | #include "common/microprofile.h" |
| 20 | #include "common/vector_math.h" | 18 | #include "common/vector_math.h" |
| 21 | |||
| 22 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 23 | |||
| 24 | #include "video_core/debug_utils/debug_utils.h" | 20 | #include "video_core/debug_utils/debug_utils.h" |
| 25 | #include "video_core/pica_state.h" | 21 | #include "video_core/pica_state.h" |
| 26 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 22 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| @@ -35,18 +31,18 @@ struct FormatTuple { | |||
| 35 | }; | 31 | }; |
| 36 | 32 | ||
| 37 | static const std::array<FormatTuple, 5> fb_format_tuples = {{ | 33 | static const std::array<FormatTuple, 5> fb_format_tuples = {{ |
| 38 | { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8 | 34 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 |
| 39 | { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8 | 35 | {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 |
| 40 | { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1 | 36 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 |
| 41 | { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565 | 37 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 |
| 42 | { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4 | 38 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 |
| 43 | }}; | 39 | }}; |
| 44 | 40 | ||
| 45 | static const std::array<FormatTuple, 4> depth_format_tuples = {{ | 41 | static const std::array<FormatTuple, 4> depth_format_tuples = {{ |
| 46 | { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16 | 42 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 |
| 47 | {}, | 43 | {}, |
| 48 | { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24 | 44 | {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 |
| 49 | { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8 | 45 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 |
| 50 | }}; | 46 | }}; |
| 51 | 47 | ||
| 52 | RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | 48 | RasterizerCacheOpenGL::RasterizerCacheOpenGL() { |
| @@ -58,7 +54,9 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | |||
| 58 | FlushAll(); | 54 | FlushAll(); |
| 59 | } | 55 | } |
| 60 | 56 | ||
| 61 | static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) { | 57 | static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, |
| 58 | u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, | ||
| 59 | u8* gl_data, bool morton_to_gl) { | ||
| 62 | using PixelFormat = CachedSurface::PixelFormat; | 60 | using PixelFormat = CachedSurface::PixelFormat; |
| 63 | 61 | ||
| 64 | u8* data_ptrs[2]; | 62 | u8* data_ptrs[2]; |
| @@ -72,7 +70,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, | |||
| 72 | for (unsigned y = 0; y < height; ++y) { | 70 | for (unsigned y = 0; y < height; ++y) { |
| 73 | for (unsigned x = 0; x < width; ++x) { | 71 | for (unsigned x = 0; x < width; ++x) { |
| 74 | const u32 coarse_y = y & ~7; | 72 | const u32 coarse_y = y & ~7; |
| 75 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | 73 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |
| 74 | coarse_y * width * bytes_per_pixel; | ||
| 76 | u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; | 75 | u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; |
| 77 | 76 | ||
| 78 | data_ptrs[morton_to_gl] = morton_data + morton_offset; | 77 | data_ptrs[morton_to_gl] = morton_data + morton_offset; |
| @@ -81,7 +80,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, | |||
| 81 | // Swap depth and stencil value ordering since 3DS does not match OpenGL | 80 | // Swap depth and stencil value ordering since 3DS does not match OpenGL |
| 82 | u32 depth_stencil; | 81 | u32 depth_stencil; |
| 83 | memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); | 82 | memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); |
| 84 | depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]); | 83 | depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | |
| 84 | (depth_stencil >> depth_stencil_shifts[1]); | ||
| 85 | 85 | ||
| 86 | memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); | 86 | memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); |
| 87 | } | 87 | } |
| @@ -90,7 +90,8 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, | |||
| 90 | for (unsigned y = 0; y < height; ++y) { | 90 | for (unsigned y = 0; y < height; ++y) { |
| 91 | for (unsigned x = 0; x < width; ++x) { | 91 | for (unsigned x = 0; x < width; ++x) { |
| 92 | const u32 coarse_y = y & ~7; | 92 | const u32 coarse_y = y & ~7; |
| 93 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | 93 | u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |
| 94 | coarse_y * width * bytes_per_pixel; | ||
| 94 | u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; | 95 | u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; |
| 95 | 96 | ||
| 96 | data_ptrs[morton_to_gl] = morton_data + morton_offset; | 97 | data_ptrs[morton_to_gl] = morton_data + morton_offset; |
| @@ -102,17 +103,21 @@ static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, | |||
| 102 | } | 103 | } |
| 103 | } | 104 | } |
| 104 | 105 | ||
| 105 | bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) { | 106 | bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, |
| 107 | CachedSurface::SurfaceType type, | ||
| 108 | const MathUtil::Rectangle<int>& src_rect, | ||
| 109 | const MathUtil::Rectangle<int>& dst_rect) { | ||
| 106 | using SurfaceType = CachedSurface::SurfaceType; | 110 | using SurfaceType = CachedSurface::SurfaceType; |
| 107 | 111 | ||
| 108 | OpenGLState cur_state = OpenGLState::GetCurState(); | 112 | OpenGLState cur_state = OpenGLState::GetCurState(); |
| 109 | 113 | ||
| 110 | // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components | 114 | // Make sure textures aren't bound to texture units, since going to bind them to framebuffer |
| 115 | // components | ||
| 111 | OpenGLState::ResetTexture(src_tex); | 116 | OpenGLState::ResetTexture(src_tex); |
| 112 | OpenGLState::ResetTexture(dst_tex); | 117 | OpenGLState::ResetTexture(dst_tex); |
| 113 | 118 | ||
| 114 | // Keep track of previous framebuffer bindings | 119 | // Keep track of previous framebuffer bindings |
| 115 | GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer }; | 120 | GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer}; |
| 116 | cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; | 121 | cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; |
| 117 | cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; | 122 | cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; |
| 118 | cur_state.Apply(); | 123 | cur_state.Apply(); |
| @@ -120,11 +125,15 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS | |||
| 120 | u32 buffers = 0; | 125 | u32 buffers = 0; |
| 121 | 126 | ||
| 122 | if (type == SurfaceType::Color || type == SurfaceType::Texture) { | 127 | if (type == SurfaceType::Color || type == SurfaceType::Texture) { |
| 123 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); | 128 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, |
| 124 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | 129 | 0); |
| 130 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 131 | 0); | ||
| 125 | 132 | ||
| 126 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); | 133 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, |
| 127 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | 134 | 0); |
| 135 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 136 | 0); | ||
| 128 | 137 | ||
| 129 | buffers = GL_COLOR_BUFFER_BIT; | 138 | buffers = GL_COLOR_BUFFER_BIT; |
| 130 | } else if (type == SurfaceType::Depth) { | 139 | } else if (type == SurfaceType::Depth) { |
| @@ -139,10 +148,12 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS | |||
| 139 | buffers = GL_DEPTH_BUFFER_BIT; | 148 | buffers = GL_DEPTH_BUFFER_BIT; |
| 140 | } else if (type == SurfaceType::DepthStencil) { | 149 | } else if (type == SurfaceType::DepthStencil) { |
| 141 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 150 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
| 142 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); | 151 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, |
| 152 | src_tex, 0); | ||
| 143 | 153 | ||
| 144 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | 154 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
| 145 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); | 155 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, |
| 156 | dst_tex, 0); | ||
| 146 | 157 | ||
| 147 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | 158 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; |
| 148 | } | 159 | } |
| @@ -155,9 +166,9 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS | |||
| 155 | return false; | 166 | return false; |
| 156 | } | 167 | } |
| 157 | 168 | ||
| 158 | glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, | 169 | glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, |
| 159 | dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, | 170 | dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, |
| 160 | buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); | 171 | buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); |
| 161 | 172 | ||
| 162 | // Restore previous framebuffer bindings | 173 | // Restore previous framebuffer bindings |
| 163 | cur_state.draw.read_framebuffer = old_fbs[0]; | 174 | cur_state.draw.read_framebuffer = old_fbs[0]; |
| @@ -167,17 +178,24 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedS | |||
| 167 | return true; | 178 | return true; |
| 168 | } | 179 | } |
| 169 | 180 | ||
| 170 | bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) { | 181 | bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, |
| 182 | const MathUtil::Rectangle<int>& src_rect, | ||
| 183 | CachedSurface* dst_surface, | ||
| 184 | const MathUtil::Rectangle<int>& dst_rect) { | ||
| 171 | using SurfaceType = CachedSurface::SurfaceType; | 185 | using SurfaceType = CachedSurface::SurfaceType; |
| 172 | 186 | ||
| 173 | if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { | 187 | if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, |
| 188 | dst_surface->pixel_format)) { | ||
| 174 | return false; | 189 | return false; |
| 175 | } | 190 | } |
| 176 | 191 | ||
| 177 | return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); | 192 | return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, |
| 193 | CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, | ||
| 194 | dst_rect); | ||
| 178 | } | 195 | } |
| 179 | 196 | ||
| 180 | static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) { | 197 | static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, |
| 198 | u32 width, u32 height) { | ||
| 181 | // Allocate an uninitialized texture of appropriate size and format for the surface | 199 | // Allocate an uninitialized texture of appropriate size and format for the surface |
| 182 | using SurfaceType = CachedSurface::SurfaceType; | 200 | using SurfaceType = CachedSurface::SurfaceType; |
| 183 | 201 | ||
| @@ -200,11 +218,11 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi | |||
| 200 | ASSERT(tuple_idx < depth_format_tuples.size()); | 218 | ASSERT(tuple_idx < depth_format_tuples.size()); |
| 201 | tuple = depth_format_tuples[tuple_idx]; | 219 | tuple = depth_format_tuples[tuple_idx]; |
| 202 | } else { | 220 | } else { |
| 203 | tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; | 221 | tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; |
| 204 | } | 222 | } |
| 205 | 223 | ||
| 206 | glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, | 224 | glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format, |
| 207 | tuple.format, tuple.type, nullptr); | 225 | tuple.type, nullptr); |
| 208 | 226 | ||
| 209 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); | 227 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); |
| 210 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | 228 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
| @@ -217,7 +235,8 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi | |||
| 217 | } | 235 | } |
| 218 | 236 | ||
| 219 | MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); | 237 | MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); |
| 220 | CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) { | 238 | CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, |
| 239 | bool load_if_create) { | ||
| 221 | using PixelFormat = CachedSurface::PixelFormat; | 240 | using PixelFormat = CachedSurface::PixelFormat; |
| 222 | using SurfaceType = CachedSurface::SurfaceType; | 241 | using SurfaceType = CachedSurface::SurfaceType; |
| 223 | 242 | ||
| @@ -225,29 +244,31 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo | |||
| 225 | return nullptr; | 244 | return nullptr; |
| 226 | } | 245 | } |
| 227 | 246 | ||
| 228 | u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; | 247 | u32 params_size = |
| 248 | params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; | ||
| 229 | 249 | ||
| 230 | // Check for an exact match in existing surfaces | 250 | // Check for an exact match in existing surfaces |
| 231 | CachedSurface* best_exact_surface = nullptr; | 251 | CachedSurface* best_exact_surface = nullptr; |
| 232 | float exact_surface_goodness = -1.f; | 252 | float exact_surface_goodness = -1.f; |
| 233 | 253 | ||
| 234 | auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); | 254 | auto surface_interval = |
| 255 | boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); | ||
| 235 | auto range = surface_cache.equal_range(surface_interval); | 256 | auto range = surface_cache.equal_range(surface_interval); |
| 236 | for (auto it = range.first; it != range.second; ++it) { | 257 | for (auto it = range.first; it != range.second; ++it) { |
| 237 | for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { | 258 | for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { |
| 238 | CachedSurface* surface = it2->get(); | 259 | CachedSurface* surface = it2->get(); |
| 239 | 260 | ||
| 240 | // Check if the request matches the surface exactly | 261 | // Check if the request matches the surface exactly |
| 241 | if (params.addr == surface->addr && | 262 | if (params.addr == surface->addr && params.width == surface->width && |
| 242 | params.width == surface->width && params.height == surface->height && | 263 | params.height == surface->height && params.pixel_format == surface->pixel_format) { |
| 243 | params.pixel_format == surface->pixel_format) | ||
| 244 | { | ||
| 245 | // Make sure optional param-matching criteria are fulfilled | 264 | // Make sure optional param-matching criteria are fulfilled |
| 246 | bool tiling_match = (params.is_tiled == surface->is_tiled); | 265 | bool tiling_match = (params.is_tiled == surface->is_tiled); |
| 247 | bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); | 266 | bool res_scale_match = (params.res_scale_width == surface->res_scale_width && |
| 267 | params.res_scale_height == surface->res_scale_height); | ||
| 248 | if (!match_res_scale || res_scale_match) { | 268 | if (!match_res_scale || res_scale_match) { |
| 249 | // Prioritize same-tiling and highest resolution surfaces | 269 | // Prioritize same-tiling and highest resolution surfaces |
| 250 | float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; | 270 | float match_goodness = |
| 271 | (float)tiling_match + surface->res_scale_width * surface->res_scale_height; | ||
| 251 | if (match_goodness > exact_surface_goodness || surface->dirty) { | 272 | if (match_goodness > exact_surface_goodness || surface->dirty) { |
| 252 | exact_surface_goodness = match_goodness; | 273 | exact_surface_goodness = match_goodness; |
| 253 | best_exact_surface = surface; | 274 | best_exact_surface = surface; |
| @@ -288,9 +309,11 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo | |||
| 288 | 309 | ||
| 289 | if (!load_if_create) { | 310 | if (!load_if_create) { |
| 290 | // Don't load any data; just allocate the surface's texture | 311 | // Don't load any data; just allocate the surface's texture |
| 291 | AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); | 312 | AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, |
| 313 | new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); | ||
| 292 | } else { | 314 | } else { |
| 293 | // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game | 315 | // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead |
| 316 | // of memory upload below if that's a common scenario in some game | ||
| 294 | 317 | ||
| 295 | Memory::RasterizerFlushRegion(params.addr, params_size); | 318 | Memory::RasterizerFlushRegion(params.addr, params_size); |
| 296 | 319 | ||
| @@ -318,7 +341,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo | |||
| 318 | tuple = fb_format_tuples[(unsigned int)params.pixel_format]; | 341 | tuple = fb_format_tuples[(unsigned int)params.pixel_format]; |
| 319 | } else { | 342 | } else { |
| 320 | // Texture | 343 | // Texture |
| 321 | tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; | 344 | tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; |
| 322 | } | 345 | } |
| 323 | 346 | ||
| 324 | std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); | 347 | std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); |
| @@ -326,19 +349,23 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo | |||
| 326 | Pica::DebugUtils::TextureInfo tex_info; | 349 | Pica::DebugUtils::TextureInfo tex_info; |
| 327 | tex_info.width = params.width; | 350 | tex_info.width = params.width; |
| 328 | tex_info.height = params.height; | 351 | tex_info.height = params.height; |
| 329 | tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; | 352 | tex_info.stride = |
| 353 | params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; | ||
| 330 | tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format; | 354 | tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format; |
| 331 | tex_info.physical_address = params.addr; | 355 | tex_info.physical_address = params.addr; |
| 332 | 356 | ||
| 333 | for (unsigned y = 0; y < params.height; ++y) { | 357 | for (unsigned y = 0; y < params.height; ++y) { |
| 334 | for (unsigned x = 0; x < params.width; ++x) { | 358 | for (unsigned x = 0; x < params.width; ++x) { |
| 335 | tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info); | 359 | tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture( |
| 360 | texture_src_data, x, params.height - 1 - y, tex_info); | ||
| 336 | } | 361 | } |
| 337 | } | 362 | } |
| 338 | 363 | ||
| 339 | glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); | 364 | glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, |
| 365 | 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); | ||
| 340 | } else { | 366 | } else { |
| 341 | // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format | 367 | // Depth/Stencil formats need special treatment since they aren't sampleable using |
| 368 | // LookupTexture and can't use RGBA format | ||
| 342 | size_t tuple_idx = (size_t)params.pixel_format - 14; | 369 | size_t tuple_idx = (size_t)params.pixel_format - 14; |
| 343 | ASSERT(tuple_idx < depth_format_tuples.size()); | 370 | ASSERT(tuple_idx < depth_format_tuples.size()); |
| 344 | const FormatTuple& tuple = depth_format_tuples[tuple_idx]; | 371 | const FormatTuple& tuple = depth_format_tuples[tuple_idx]; |
| @@ -350,14 +377,18 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo | |||
| 350 | 377 | ||
| 351 | u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; | 378 | u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; |
| 352 | 379 | ||
| 353 | std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel); | 380 | std::vector<u8> temp_fb_depth_buffer(params.width * params.height * |
| 381 | gl_bytes_per_pixel); | ||
| 354 | 382 | ||
| 355 | u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); | 383 | u8* temp_fb_depth_buffer_ptr = |
| 384 | use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); | ||
| 356 | 385 | ||
| 357 | MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true); | 386 | MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, |
| 387 | gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, | ||
| 388 | true); | ||
| 358 | 389 | ||
| 359 | glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, | 390 | glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, |
| 360 | tuple.format, tuple.type, temp_fb_depth_buffer.data()); | 391 | 0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); |
| 361 | } | 392 | } |
| 362 | } | 393 | } |
| 363 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | 394 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); |
| @@ -367,10 +398,13 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo | |||
| 367 | OGLTexture scaled_texture; | 398 | OGLTexture scaled_texture; |
| 368 | scaled_texture.Create(); | 399 | scaled_texture.Create(); |
| 369 | 400 | ||
| 370 | AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); | 401 | AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, |
| 371 | BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format), | 402 | new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); |
| 372 | MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), | 403 | BlitTextures(new_surface->texture.handle, scaled_texture.handle, |
| 373 | MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight())); | 404 | CachedSurface::GetFormatType(new_surface->pixel_format), |
| 405 | MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), | ||
| 406 | MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), | ||
| 407 | new_surface->GetScaledHeight())); | ||
| 374 | 408 | ||
| 375 | new_surface->texture.Release(); | 409 | new_surface->texture.Release(); |
| 376 | new_surface->texture.handle = scaled_texture.handle; | 410 | new_surface->texture.handle = scaled_texture.handle; |
| @@ -389,11 +423,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo | |||
| 389 | } | 423 | } |
| 390 | 424 | ||
| 391 | Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); | 425 | Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); |
| 392 | surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface }))); | 426 | surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open( |
| 427 | new_surface->addr, new_surface->addr + new_surface->size), | ||
| 428 | std::set<std::shared_ptr<CachedSurface>>({new_surface}))); | ||
| 393 | return new_surface.get(); | 429 | return new_surface.get(); |
| 394 | } | 430 | } |
| 395 | 431 | ||
| 396 | CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) { | 432 | CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, |
| 433 | bool match_res_scale, bool load_if_create, | ||
| 434 | MathUtil::Rectangle<int>& out_rect) { | ||
| 397 | if (params.addr == 0) { | 435 | if (params.addr == 0) { |
| 398 | return nullptr; | 436 | return nullptr; |
| 399 | } | 437 | } |
| @@ -405,7 +443,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params | |||
| 405 | CachedSurface* best_subrect_surface = nullptr; | 443 | CachedSurface* best_subrect_surface = nullptr; |
| 406 | float subrect_surface_goodness = -1.f; | 444 | float subrect_surface_goodness = -1.f; |
| 407 | 445 | ||
| 408 | auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); | 446 | auto surface_interval = |
| 447 | boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); | ||
| 409 | auto cache_upper_bound = surface_cache.upper_bound(surface_interval); | 448 | auto cache_upper_bound = surface_cache.upper_bound(surface_interval); |
| 410 | for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { | 449 | for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { |
| 411 | for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { | 450 | for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { |
| @@ -414,14 +453,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params | |||
| 414 | // Check if the request is contained in the surface | 453 | // Check if the request is contained in the surface |
| 415 | if (params.addr >= surface->addr && | 454 | if (params.addr >= surface->addr && |
| 416 | params.addr + params_size - 1 <= surface->addr + surface->size - 1 && | 455 | params.addr + params_size - 1 <= surface->addr + surface->size - 1 && |
| 417 | params.pixel_format == surface->pixel_format) | 456 | params.pixel_format == surface->pixel_format) { |
| 418 | { | ||
| 419 | // Make sure optional param-matching criteria are fulfilled | 457 | // Make sure optional param-matching criteria are fulfilled |
| 420 | bool tiling_match = (params.is_tiled == surface->is_tiled); | 458 | bool tiling_match = (params.is_tiled == surface->is_tiled); |
| 421 | bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); | 459 | bool res_scale_match = (params.res_scale_width == surface->res_scale_width && |
| 460 | params.res_scale_height == surface->res_scale_height); | ||
| 422 | if (!match_res_scale || res_scale_match) { | 461 | if (!match_res_scale || res_scale_match) { |
| 423 | // Prioritize same-tiling and highest resolution surfaces | 462 | // Prioritize same-tiling and highest resolution surfaces |
| 424 | float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height; | 463 | float match_goodness = |
| 464 | (float)tiling_match + surface->res_scale_width * surface->res_scale_height; | ||
| 425 | if (match_goodness > subrect_surface_goodness || surface->dirty) { | 465 | if (match_goodness > subrect_surface_goodness || surface->dirty) { |
| 426 | subrect_surface_goodness = match_goodness; | 466 | subrect_surface_goodness = match_goodness; |
| 427 | best_subrect_surface = surface; | 467 | best_subrect_surface = surface; |
| @@ -433,7 +473,8 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params | |||
| 433 | 473 | ||
| 434 | // Return the best subrect surface if found | 474 | // Return the best subrect surface if found |
| 435 | if (best_subrect_surface != nullptr) { | 475 | if (best_subrect_surface != nullptr) { |
| 436 | unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); | 476 | unsigned int bytes_per_pixel = |
| 477 | (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); | ||
| 437 | 478 | ||
| 438 | int x0, y0; | 479 | int x0, y0; |
| 439 | 480 | ||
| @@ -452,7 +493,9 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params | |||
| 452 | y0 = begin_tile_index / tiles_per_row * 8; | 493 | y0 = begin_tile_index / tiles_per_row * 8; |
| 453 | 494 | ||
| 454 | // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. | 495 | // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. |
| 455 | out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height)); | 496 | out_rect = |
| 497 | MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, | ||
| 498 | best_subrect_surface->height - (y0 + params.height)); | ||
| 456 | } | 499 | } |
| 457 | 500 | ||
| 458 | out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); | 501 | out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); |
| @@ -465,16 +508,20 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params | |||
| 465 | 508 | ||
| 466 | // No subrect found - create and return a new surface | 509 | // No subrect found - create and return a new surface |
| 467 | if (!params.is_tiled) { | 510 | if (!params.is_tiled) { |
| 468 | out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height)); | 511 | out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), |
| 512 | (int)(params.height * params.res_scale_height)); | ||
| 469 | } else { | 513 | } else { |
| 470 | out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0); | 514 | out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), |
| 515 | (int)(params.width * params.res_scale_width), 0); | ||
| 471 | } | 516 | } |
| 472 | 517 | ||
| 473 | return GetSurface(params, match_res_scale, load_if_create); | 518 | return GetSurface(params, match_res_scale, load_if_create); |
| 474 | } | 519 | } |
| 475 | 520 | ||
| 476 | CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) { | 521 | CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( |
| 477 | Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); | 522 | const Pica::Regs::FullTextureConfig& config) { |
| 523 | Pica::DebugUtils::TextureInfo info = | ||
| 524 | Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); | ||
| 478 | 525 | ||
| 479 | CachedSurface params; | 526 | CachedSurface params; |
| 480 | params.addr = info.physical_address; | 527 | params.addr = info.physical_address; |
| @@ -485,20 +532,28 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTe | |||
| 485 | return GetSurface(params, false, true); | 532 | return GetSurface(params, false, true); |
| 486 | } | 533 | } |
| 487 | 534 | ||
| 488 | std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { | 535 | std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> |
| 536 | RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { | ||
| 489 | const auto& regs = Pica::g_state.regs; | 537 | const auto& regs = Pica::g_state.regs; |
| 490 | 538 | ||
| 491 | // Make sur that framebuffers don't overlap if both color and depth are being used | 539 | // Make sur that framebuffers don't overlap if both color and depth are being used |
| 492 | u32 fb_area = config.GetWidth() * config.GetHeight(); | 540 | u32 fb_area = config.GetWidth() * config.GetHeight(); |
| 493 | bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 && | 541 | bool framebuffers_overlap = |
| 494 | config.GetDepthBufferPhysicalAddress() != 0 && | 542 | config.GetColorBufferPhysicalAddress() != 0 && |
| 495 | MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), | 543 | config.GetDepthBufferPhysicalAddress() != 0 && |
| 496 | config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); | 544 | MathUtil::IntervalsIntersect( |
| 545 | config.GetColorBufferPhysicalAddress(), | ||
| 546 | fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), | ||
| 547 | config.GetDepthBufferPhysicalAddress(), | ||
| 548 | fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); | ||
| 497 | bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; | 549 | bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; |
| 498 | bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap); | 550 | bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && |
| 551 | (regs.output_merger.depth_test_enable || | ||
| 552 | regs.output_merger.depth_write_enable || !framebuffers_overlap); | ||
| 499 | 553 | ||
| 500 | if (framebuffers_overlap && using_color_fb && using_depth_fb) { | 554 | if (framebuffers_overlap && using_color_fb && using_depth_fb) { |
| 501 | LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); | 555 | LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " |
| 556 | "overlapping framebuffers not supported!"); | ||
| 502 | using_depth_fb = false; | 557 | using_depth_fb = false; |
| 503 | } | 558 | } |
| 504 | 559 | ||
| @@ -512,8 +567,10 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC | |||
| 512 | auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); | 567 | auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); |
| 513 | 568 | ||
| 514 | // Assume same scaling factor for top and bottom screens | 569 | // Assume same scaling factor for top and bottom screens |
| 515 | color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth; | 570 | color_params.res_scale_width = depth_params.res_scale_width = |
| 516 | color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight; | 571 | (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth; |
| 572 | color_params.res_scale_height = depth_params.res_scale_height = | ||
| 573 | (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight; | ||
| 517 | } | 574 | } |
| 518 | 575 | ||
| 519 | color_params.addr = config.GetColorBufferPhysicalAddress(); | 576 | color_params.addr = config.GetColorBufferPhysicalAddress(); |
| @@ -523,22 +580,28 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC | |||
| 523 | depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); | 580 | depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); |
| 524 | 581 | ||
| 525 | MathUtil::Rectangle<int> color_rect; | 582 | MathUtil::Rectangle<int> color_rect; |
| 526 | CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; | 583 | CachedSurface* color_surface = |
| 584 | using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; | ||
| 527 | 585 | ||
| 528 | MathUtil::Rectangle<int> depth_rect; | 586 | MathUtil::Rectangle<int> depth_rect; |
| 529 | CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; | 587 | CachedSurface* depth_surface = |
| 588 | using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; | ||
| 530 | 589 | ||
| 531 | // Sanity check to make sure found surfaces aren't the same | 590 | // Sanity check to make sure found surfaces aren't the same |
| 532 | if (using_depth_fb && using_color_fb && color_surface == depth_surface) { | 591 | if (using_depth_fb && using_color_fb && color_surface == depth_surface) { |
| 533 | LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); | 592 | LOG_CRITICAL( |
| 593 | Render_OpenGL, | ||
| 594 | "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); | ||
| 534 | using_depth_fb = false; | 595 | using_depth_fb = false; |
| 535 | depth_surface = nullptr; | 596 | depth_surface = nullptr; |
| 536 | } | 597 | } |
| 537 | 598 | ||
| 538 | MathUtil::Rectangle<int> rect; | 599 | MathUtil::Rectangle<int> rect; |
| 539 | 600 | ||
| 540 | if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { | 601 | if (color_surface != nullptr && depth_surface != nullptr && |
| 541 | // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match | 602 | (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { |
| 603 | // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if | ||
| 604 | // they don't match | ||
| 542 | if (color_rect.left != 0 || color_rect.top != 0) { | 605 | if (color_rect.left != 0 || color_rect.top != 0) { |
| 543 | color_surface = GetSurface(color_params, true, true); | 606 | color_surface = GetSurface(color_params, true, true); |
| 544 | } | 607 | } |
| @@ -548,9 +611,13 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC | |||
| 548 | } | 611 | } |
| 549 | 612 | ||
| 550 | if (!color_surface->is_tiled) { | 613 | if (!color_surface->is_tiled) { |
| 551 | rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height)); | 614 | rect = MathUtil::Rectangle<int>( |
| 615 | 0, 0, (int)(color_params.width * color_params.res_scale_width), | ||
| 616 | (int)(color_params.height * color_params.res_scale_height)); | ||
| 552 | } else { | 617 | } else { |
| 553 | rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0); | 618 | rect = MathUtil::Rectangle<int>( |
| 619 | 0, (int)(color_params.height * color_params.res_scale_height), | ||
| 620 | (int)(color_params.width * color_params.res_scale_width), 0); | ||
| 554 | } | 621 | } |
| 555 | } else if (color_surface != nullptr) { | 622 | } else if (color_surface != nullptr) { |
| 556 | rect = color_rect; | 623 | rect = color_rect; |
| @@ -564,7 +631,8 @@ std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerC | |||
| 564 | } | 631 | } |
| 565 | 632 | ||
| 566 | CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { | 633 | CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { |
| 567 | auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress()); | 634 | auto surface_interval = |
| 635 | boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress()); | ||
| 568 | auto range = surface_cache.equal_range(surface_interval); | 636 | auto range = surface_cache.equal_range(surface_interval); |
| 569 | for (auto it = range.first; it != range.second; ++it) { | 637 | for (auto it = range.first; it != range.second; ++it) { |
| 570 | for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { | 638 | for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { |
| @@ -581,8 +649,9 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF | |||
| 581 | 649 | ||
| 582 | if (surface->addr == config.GetStartAddress() && | 650 | if (surface->addr == config.GetStartAddress() && |
| 583 | CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && | 651 | CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && |
| 584 | (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress())) | 652 | (surface->width * surface->height * |
| 585 | { | 653 | CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == |
| 654 | (config.GetEndAddress() - config.GetStartAddress())) { | ||
| 586 | return surface; | 655 | return surface; |
| 587 | } | 656 | } |
| 588 | } | 657 | } |
| @@ -617,8 +686,11 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { | |||
| 617 | if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { | 686 | if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { |
| 618 | unscaled_tex.Create(); | 687 | unscaled_tex.Create(); |
| 619 | 688 | ||
| 620 | AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height); | 689 | AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, |
| 621 | BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format), | 690 | surface->height); |
| 691 | BlitTextures( | ||
| 692 | surface->texture.handle, unscaled_tex.handle, | ||
| 693 | CachedSurface::GetFormatType(surface->pixel_format), | ||
| 622 | MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), | 694 | MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), |
| 623 | MathUtil::Rectangle<int>(0, 0, surface->width, surface->height)); | 695 | MathUtil::Rectangle<int>(0, 0, surface->width, surface->height)); |
| 624 | 696 | ||
| @@ -648,10 +720,14 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { | |||
| 648 | 720 | ||
| 649 | glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); | 721 | glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); |
| 650 | 722 | ||
| 651 | // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. | 723 | // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion |
| 652 | MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false); | 724 | // is necessary. |
| 725 | MortonCopyPixels(surface->pixel_format, surface->width, surface->height, | ||
| 726 | bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), | ||
| 727 | false); | ||
| 653 | } else { | 728 | } else { |
| 654 | // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format | 729 | // Depth/Stencil formats need special treatment since they aren't sampleable using |
| 730 | // LookupTexture and can't use RGBA format | ||
| 655 | size_t tuple_idx = (size_t)surface->pixel_format - 14; | 731 | size_t tuple_idx = (size_t)surface->pixel_format - 14; |
| 656 | ASSERT(tuple_idx < depth_format_tuples.size()); | 732 | ASSERT(tuple_idx < depth_format_tuples.size()); |
| 657 | const FormatTuple& tuple = depth_format_tuples[tuple_idx]; | 733 | const FormatTuple& tuple = depth_format_tuples[tuple_idx]; |
| @@ -669,7 +745,9 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { | |||
| 669 | 745 | ||
| 670 | u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); | 746 | u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); |
| 671 | 747 | ||
| 672 | MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false); | 748 | MortonCopyPixels(surface->pixel_format, surface->width, surface->height, |
| 749 | bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, | ||
| 750 | false); | ||
| 673 | } | 751 | } |
| 674 | } | 752 | } |
| 675 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | 753 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); |
| @@ -680,7 +758,8 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { | |||
| 680 | cur_state.Apply(); | 758 | cur_state.Apply(); |
| 681 | } | 759 | } |
| 682 | 760 | ||
| 683 | void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) { | 761 | void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, |
| 762 | bool invalidate) { | ||
| 684 | if (size == 0) { | 763 | if (size == 0) { |
| 685 | return; | 764 | return; |
| 686 | } | 765 | } |
| @@ -691,8 +770,11 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac | |||
| 691 | auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size); | 770 | auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size); |
| 692 | auto cache_upper_bound = surface_cache.upper_bound(surface_interval); | 771 | auto cache_upper_bound = surface_cache.upper_bound(surface_interval); |
| 693 | for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { | 772 | for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { |
| 694 | std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()), | 773 | std::copy_if(it->second.begin(), it->second.end(), |
| 695 | [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); }); | 774 | std::inserter(touching_surfaces, touching_surfaces.end()), |
| 775 | [skip_surface](std::shared_ptr<CachedSurface> surface) { | ||
| 776 | return (surface.get() != skip_surface); | ||
| 777 | }); | ||
| 696 | } | 778 | } |
| 697 | 779 | ||
| 698 | // Flush and invalidate surfaces | 780 | // Flush and invalidate surfaces |
| @@ -700,7 +782,10 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurfac | |||
| 700 | FlushSurface(surface.get()); | 782 | FlushSurface(surface.get()); |
| 701 | if (invalidate) { | 783 | if (invalidate) { |
| 702 | Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); | 784 | Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); |
| 703 | surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface }))); | 785 | surface_cache.subtract( |
| 786 | std::make_pair(boost::icl::interval<PAddr>::right_open( | ||
| 787 | surface->addr, surface->addr + surface->size), | ||
| 788 | std::set<std::shared_ptr<CachedSurface>>({surface}))); | ||
| 704 | } | 789 | } |
| 705 | } | 790 | } |
| 706 | } | 791 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 225596415..849530d86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -8,21 +8,18 @@ | |||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <set> | 9 | #include <set> |
| 10 | #include <tuple> | 10 | #include <tuple> |
| 11 | |||
| 12 | #include <boost/icl/interval_map.hpp> | 11 | #include <boost/icl/interval_map.hpp> |
| 13 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| 14 | |||
| 15 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 16 | #include "common/common_funcs.h" | 14 | #include "common/common_funcs.h" |
| 17 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 18 | |||
| 19 | #include "core/hw/gpu.h" | 16 | #include "core/hw/gpu.h" |
| 20 | |||
| 21 | #include "video_core/pica.h" | 17 | #include "video_core/pica.h" |
| 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 18 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 23 | 19 | ||
| 24 | namespace MathUtil { | 20 | namespace MathUtil { |
| 25 | template <class T> struct Rectangle; | 21 | template <class T> |
| 22 | struct Rectangle; | ||
| 26 | } | 23 | } |
| 27 | 24 | ||
| 28 | struct CachedSurface; | 25 | struct CachedSurface; |
| @@ -32,38 +29,38 @@ using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<Ca | |||
| 32 | struct CachedSurface { | 29 | struct CachedSurface { |
| 33 | enum class PixelFormat { | 30 | enum class PixelFormat { |
| 34 | // First 5 formats are shared between textures and color buffers | 31 | // First 5 formats are shared between textures and color buffers |
| 35 | RGBA8 = 0, | 32 | RGBA8 = 0, |
| 36 | RGB8 = 1, | 33 | RGB8 = 1, |
| 37 | RGB5A1 = 2, | 34 | RGB5A1 = 2, |
| 38 | RGB565 = 3, | 35 | RGB565 = 3, |
| 39 | RGBA4 = 4, | 36 | RGBA4 = 4, |
| 40 | 37 | ||
| 41 | // Texture-only formats | 38 | // Texture-only formats |
| 42 | IA8 = 5, | 39 | IA8 = 5, |
| 43 | RG8 = 6, | 40 | RG8 = 6, |
| 44 | I8 = 7, | 41 | I8 = 7, |
| 45 | A8 = 8, | 42 | A8 = 8, |
| 46 | IA4 = 9, | 43 | IA4 = 9, |
| 47 | I4 = 10, | 44 | I4 = 10, |
| 48 | A4 = 11, | 45 | A4 = 11, |
| 49 | ETC1 = 12, | 46 | ETC1 = 12, |
| 50 | ETC1A4 = 13, | 47 | ETC1A4 = 13, |
| 51 | 48 | ||
| 52 | // Depth buffer-only formats | 49 | // Depth buffer-only formats |
| 53 | D16 = 14, | 50 | D16 = 14, |
| 54 | // gap | 51 | // gap |
| 55 | D24 = 16, | 52 | D24 = 16, |
| 56 | D24S8 = 17, | 53 | D24S8 = 17, |
| 57 | 54 | ||
| 58 | Invalid = 255, | 55 | Invalid = 255, |
| 59 | }; | 56 | }; |
| 60 | 57 | ||
| 61 | enum class SurfaceType { | 58 | enum class SurfaceType { |
| 62 | Color = 0, | 59 | Color = 0, |
| 63 | Texture = 1, | 60 | Texture = 1, |
| 64 | Depth = 2, | 61 | Depth = 2, |
| 65 | DepthStencil = 3, | 62 | DepthStencil = 3, |
| 66 | Invalid = 4, | 63 | Invalid = 4, |
| 67 | }; | 64 | }; |
| 68 | 65 | ||
| 69 | static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { | 66 | static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { |
| @@ -101,7 +98,8 @@ struct CachedSurface { | |||
| 101 | } | 98 | } |
| 102 | 99 | ||
| 103 | static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) { | 100 | static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) { |
| 104 | return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid; | 101 | return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) |
| 102 | : PixelFormat::Invalid; | ||
| 105 | } | 103 | } |
| 106 | 104 | ||
| 107 | static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { | 105 | static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { |
| @@ -120,7 +118,8 @@ struct CachedSurface { | |||
| 120 | SurfaceType a_type = GetFormatType(pixel_format_a); | 118 | SurfaceType a_type = GetFormatType(pixel_format_a); |
| 121 | SurfaceType b_type = GetFormatType(pixel_format_b); | 119 | SurfaceType b_type = GetFormatType(pixel_format_b); |
| 122 | 120 | ||
| 123 | if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { | 121 | if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && |
| 122 | (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { | ||
| 124 | return true; | 123 | return true; |
| 125 | } | 124 | } |
| 126 | 125 | ||
| @@ -187,22 +186,30 @@ public: | |||
| 187 | ~RasterizerCacheOpenGL(); | 186 | ~RasterizerCacheOpenGL(); |
| 188 | 187 | ||
| 189 | /// Blits one texture to another | 188 | /// Blits one texture to another |
| 190 | bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect); | 189 | bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, |
| 190 | const MathUtil::Rectangle<int>& src_rect, | ||
| 191 | const MathUtil::Rectangle<int>& dst_rect); | ||
| 191 | 192 | ||
| 192 | /// Attempt to blit one surface's texture to another | 193 | /// Attempt to blit one surface's texture to another |
| 193 | bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect); | 194 | bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, |
| 195 | CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect); | ||
| 194 | 196 | ||
| 195 | /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) | 197 | /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) |
| 196 | CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create); | 198 | CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, |
| 199 | bool load_if_create); | ||
| 197 | 200 | ||
| 198 | /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached) | 201 | /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from |
| 199 | CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect); | 202 | /// 3DS memory to OpenGL and caches it (if not already cached) |
| 203 | CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, | ||
| 204 | bool load_if_create, MathUtil::Rectangle<int>& out_rect); | ||
| 200 | 205 | ||
| 201 | /// Gets a surface based on the texture configuration | 206 | /// Gets a surface based on the texture configuration |
| 202 | CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config); | 207 | CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config); |
| 203 | 208 | ||
| 204 | /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration | 209 | /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer |
| 205 | std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config); | 210 | /// configuration |
| 211 | std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces( | ||
| 212 | const Pica::Regs::FramebufferConfig& config); | ||
| 206 | 213 | ||
| 207 | /// Attempt to get a surface that exactly matches the fill region and format | 214 | /// Attempt to get a surface that exactly matches the fill region and format |
| 208 | CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); | 215 | CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); |
| @@ -210,7 +217,8 @@ public: | |||
| 210 | /// Write the surface back to memory | 217 | /// Write the surface back to memory |
| 211 | void FlushSurface(CachedSurface* surface); | 218 | void FlushSurface(CachedSurface* surface); |
| 212 | 219 | ||
| 213 | /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache | 220 | /// Write any cached resources overlapping the region back to memory (if dirty) and optionally |
| 221 | /// invalidate them in the cache | ||
| 214 | void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); | 222 | void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); |
| 215 | 223 | ||
| 216 | /// Flush all cached resources tracked by this cache manager | 224 | /// Flush all cached resources tracked by this cache manager |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index eb128966c..13301ec9f 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -5,30 +5,36 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | |||
| 9 | #include <glad/glad.h> | 8 | #include <glad/glad.h> |
| 10 | |||
| 11 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 12 | |||
| 13 | #include "video_core/renderer_opengl/gl_shader_util.h" | 10 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 14 | #include "video_core/renderer_opengl/gl_state.h" | 11 | #include "video_core/renderer_opengl/gl_state.h" |
| 15 | 12 | ||
| 16 | class OGLTexture : private NonCopyable { | 13 | class OGLTexture : private NonCopyable { |
| 17 | public: | 14 | public: |
| 18 | OGLTexture() = default; | 15 | OGLTexture() = default; |
| 19 | OGLTexture(OGLTexture&& o) { std::swap(handle, o.handle); } | 16 | OGLTexture(OGLTexture&& o) { |
| 20 | ~OGLTexture() { Release(); } | 17 | std::swap(handle, o.handle); |
| 21 | OGLTexture& operator=(OGLTexture&& o) { std::swap(handle, o.handle); return *this; } | 18 | } |
| 19 | ~OGLTexture() { | ||
| 20 | Release(); | ||
| 21 | } | ||
| 22 | OGLTexture& operator=(OGLTexture&& o) { | ||
| 23 | std::swap(handle, o.handle); | ||
| 24 | return *this; | ||
| 25 | } | ||
| 22 | 26 | ||
| 23 | /// Creates a new internal OpenGL resource and stores the handle | 27 | /// Creates a new internal OpenGL resource and stores the handle |
| 24 | void Create() { | 28 | void Create() { |
| 25 | if (handle != 0) return; | 29 | if (handle != 0) |
| 30 | return; | ||
| 26 | glGenTextures(1, &handle); | 31 | glGenTextures(1, &handle); |
| 27 | } | 32 | } |
| 28 | 33 | ||
| 29 | /// Deletes the internal OpenGL resource | 34 | /// Deletes the internal OpenGL resource |
| 30 | void Release() { | 35 | void Release() { |
| 31 | if (handle == 0) return; | 36 | if (handle == 0) |
| 37 | return; | ||
| 32 | glDeleteTextures(1, &handle); | 38 | glDeleteTextures(1, &handle); |
| 33 | OpenGLState::ResetTexture(handle); | 39 | OpenGLState::ResetTexture(handle); |
| 34 | handle = 0; | 40 | handle = 0; |
| @@ -40,19 +46,28 @@ public: | |||
| 40 | class OGLSampler : private NonCopyable { | 46 | class OGLSampler : private NonCopyable { |
| 41 | public: | 47 | public: |
| 42 | OGLSampler() = default; | 48 | OGLSampler() = default; |
| 43 | OGLSampler(OGLSampler&& o) { std::swap(handle, o.handle); } | 49 | OGLSampler(OGLSampler&& o) { |
| 44 | ~OGLSampler() { Release(); } | 50 | std::swap(handle, o.handle); |
| 45 | OGLSampler& operator=(OGLSampler&& o) { std::swap(handle, o.handle); return *this; } | 51 | } |
| 52 | ~OGLSampler() { | ||
| 53 | Release(); | ||
| 54 | } | ||
| 55 | OGLSampler& operator=(OGLSampler&& o) { | ||
| 56 | std::swap(handle, o.handle); | ||
| 57 | return *this; | ||
| 58 | } | ||
| 46 | 59 | ||
| 47 | /// Creates a new internal OpenGL resource and stores the handle | 60 | /// Creates a new internal OpenGL resource and stores the handle |
| 48 | void Create() { | 61 | void Create() { |
| 49 | if (handle != 0) return; | 62 | if (handle != 0) |
| 63 | return; | ||
| 50 | glGenSamplers(1, &handle); | 64 | glGenSamplers(1, &handle); |
| 51 | } | 65 | } |
| 52 | 66 | ||
| 53 | /// Deletes the internal OpenGL resource | 67 | /// Deletes the internal OpenGL resource |
| 54 | void Release() { | 68 | void Release() { |
| 55 | if (handle == 0) return; | 69 | if (handle == 0) |
| 70 | return; | ||
| 56 | glDeleteSamplers(1, &handle); | 71 | glDeleteSamplers(1, &handle); |
| 57 | OpenGLState::ResetSampler(handle); | 72 | OpenGLState::ResetSampler(handle); |
| 58 | handle = 0; | 73 | handle = 0; |
| @@ -64,19 +79,28 @@ public: | |||
| 64 | class OGLShader : private NonCopyable { | 79 | class OGLShader : private NonCopyable { |
| 65 | public: | 80 | public: |
| 66 | OGLShader() = default; | 81 | OGLShader() = default; |
| 67 | OGLShader(OGLShader&& o) { std::swap(handle, o.handle); } | 82 | OGLShader(OGLShader&& o) { |
| 68 | ~OGLShader() { Release(); } | 83 | std::swap(handle, o.handle); |
| 69 | OGLShader& operator=(OGLShader&& o) { std::swap(handle, o.handle); return *this; } | 84 | } |
| 85 | ~OGLShader() { | ||
| 86 | Release(); | ||
| 87 | } | ||
| 88 | OGLShader& operator=(OGLShader&& o) { | ||
| 89 | std::swap(handle, o.handle); | ||
| 90 | return *this; | ||
| 91 | } | ||
| 70 | 92 | ||
| 71 | /// Creates a new internal OpenGL resource and stores the handle | 93 | /// Creates a new internal OpenGL resource and stores the handle |
| 72 | void Create(const char* vert_shader, const char* frag_shader) { | 94 | void Create(const char* vert_shader, const char* frag_shader) { |
| 73 | if (handle != 0) return; | 95 | if (handle != 0) |
| 96 | return; | ||
| 74 | handle = GLShader::LoadProgram(vert_shader, frag_shader); | 97 | handle = GLShader::LoadProgram(vert_shader, frag_shader); |
| 75 | } | 98 | } |
| 76 | 99 | ||
| 77 | /// Deletes the internal OpenGL resource | 100 | /// Deletes the internal OpenGL resource |
| 78 | void Release() { | 101 | void Release() { |
| 79 | if (handle == 0) return; | 102 | if (handle == 0) |
| 103 | return; | ||
| 80 | glDeleteProgram(handle); | 104 | glDeleteProgram(handle); |
| 81 | OpenGLState::ResetProgram(handle); | 105 | OpenGLState::ResetProgram(handle); |
| 82 | handle = 0; | 106 | handle = 0; |
| @@ -88,19 +112,28 @@ public: | |||
| 88 | class OGLBuffer : private NonCopyable { | 112 | class OGLBuffer : private NonCopyable { |
| 89 | public: | 113 | public: |
| 90 | OGLBuffer() = default; | 114 | OGLBuffer() = default; |
| 91 | OGLBuffer(OGLBuffer&& o) { std::swap(handle, o.handle); } | 115 | OGLBuffer(OGLBuffer&& o) { |
| 92 | ~OGLBuffer() { Release(); } | 116 | std::swap(handle, o.handle); |
| 93 | OGLBuffer& operator=(OGLBuffer&& o) { std::swap(handle, o.handle); return *this; } | 117 | } |
| 118 | ~OGLBuffer() { | ||
| 119 | Release(); | ||
| 120 | } | ||
| 121 | OGLBuffer& operator=(OGLBuffer&& o) { | ||
| 122 | std::swap(handle, o.handle); | ||
| 123 | return *this; | ||
| 124 | } | ||
| 94 | 125 | ||
| 95 | /// Creates a new internal OpenGL resource and stores the handle | 126 | /// Creates a new internal OpenGL resource and stores the handle |
| 96 | void Create() { | 127 | void Create() { |
| 97 | if (handle != 0) return; | 128 | if (handle != 0) |
| 129 | return; | ||
| 98 | glGenBuffers(1, &handle); | 130 | glGenBuffers(1, &handle); |
| 99 | } | 131 | } |
| 100 | 132 | ||
| 101 | /// Deletes the internal OpenGL resource | 133 | /// Deletes the internal OpenGL resource |
| 102 | void Release() { | 134 | void Release() { |
| 103 | if (handle == 0) return; | 135 | if (handle == 0) |
| 136 | return; | ||
| 104 | glDeleteBuffers(1, &handle); | 137 | glDeleteBuffers(1, &handle); |
| 105 | OpenGLState::ResetBuffer(handle); | 138 | OpenGLState::ResetBuffer(handle); |
| 106 | handle = 0; | 139 | handle = 0; |
| @@ -112,19 +145,28 @@ public: | |||
| 112 | class OGLVertexArray : private NonCopyable { | 145 | class OGLVertexArray : private NonCopyable { |
| 113 | public: | 146 | public: |
| 114 | OGLVertexArray() = default; | 147 | OGLVertexArray() = default; |
| 115 | OGLVertexArray(OGLVertexArray&& o) { std::swap(handle, o.handle); } | 148 | OGLVertexArray(OGLVertexArray&& o) { |
| 116 | ~OGLVertexArray() { Release(); } | 149 | std::swap(handle, o.handle); |
| 117 | OGLVertexArray& operator=(OGLVertexArray&& o) { std::swap(handle, o.handle); return *this; } | 150 | } |
| 151 | ~OGLVertexArray() { | ||
| 152 | Release(); | ||
| 153 | } | ||
| 154 | OGLVertexArray& operator=(OGLVertexArray&& o) { | ||
| 155 | std::swap(handle, o.handle); | ||
| 156 | return *this; | ||
| 157 | } | ||
| 118 | 158 | ||
| 119 | /// Creates a new internal OpenGL resource and stores the handle | 159 | /// Creates a new internal OpenGL resource and stores the handle |
| 120 | void Create() { | 160 | void Create() { |
| 121 | if (handle != 0) return; | 161 | if (handle != 0) |
| 162 | return; | ||
| 122 | glGenVertexArrays(1, &handle); | 163 | glGenVertexArrays(1, &handle); |
| 123 | } | 164 | } |
| 124 | 165 | ||
| 125 | /// Deletes the internal OpenGL resource | 166 | /// Deletes the internal OpenGL resource |
| 126 | void Release() { | 167 | void Release() { |
| 127 | if (handle == 0) return; | 168 | if (handle == 0) |
| 169 | return; | ||
| 128 | glDeleteVertexArrays(1, &handle); | 170 | glDeleteVertexArrays(1, &handle); |
| 129 | OpenGLState::ResetVertexArray(handle); | 171 | OpenGLState::ResetVertexArray(handle); |
| 130 | handle = 0; | 172 | handle = 0; |
| @@ -136,19 +178,28 @@ public: | |||
| 136 | class OGLFramebuffer : private NonCopyable { | 178 | class OGLFramebuffer : private NonCopyable { |
| 137 | public: | 179 | public: |
| 138 | OGLFramebuffer() = default; | 180 | OGLFramebuffer() = default; |
| 139 | OGLFramebuffer(OGLFramebuffer&& o) { std::swap(handle, o.handle); } | 181 | OGLFramebuffer(OGLFramebuffer&& o) { |
| 140 | ~OGLFramebuffer() { Release(); } | 182 | std::swap(handle, o.handle); |
| 141 | OGLFramebuffer& operator=(OGLFramebuffer&& o) { std::swap(handle, o.handle); return *this; } | 183 | } |
| 184 | ~OGLFramebuffer() { | ||
| 185 | Release(); | ||
| 186 | } | ||
| 187 | OGLFramebuffer& operator=(OGLFramebuffer&& o) { | ||
| 188 | std::swap(handle, o.handle); | ||
| 189 | return *this; | ||
| 190 | } | ||
| 142 | 191 | ||
| 143 | /// Creates a new internal OpenGL resource and stores the handle | 192 | /// Creates a new internal OpenGL resource and stores the handle |
| 144 | void Create() { | 193 | void Create() { |
| 145 | if (handle != 0) return; | 194 | if (handle != 0) |
| 195 | return; | ||
| 146 | glGenFramebuffers(1, &handle); | 196 | glGenFramebuffers(1, &handle); |
| 147 | } | 197 | } |
| 148 | 198 | ||
| 149 | /// Deletes the internal OpenGL resource | 199 | /// Deletes the internal OpenGL resource |
| 150 | void Release() { | 200 | void Release() { |
| 151 | if (handle == 0) return; | 201 | if (handle == 0) |
| 202 | return; | ||
| 152 | glDeleteFramebuffers(1, &handle); | 203 | glDeleteFramebuffers(1, &handle); |
| 153 | OpenGLState::ResetFramebuffer(handle); | 204 | OpenGLState::ResetFramebuffer(handle); |
| 154 | handle = 0; | 205 | handle = 0; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 3de372f67..1808ee0a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -4,11 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | |||
| 8 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 9 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 10 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 11 | |||
| 12 | #include "video_core/pica.h" | 10 | #include "video_core/pica.h" |
| 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 14 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 12 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -21,19 +19,18 @@ namespace GLShader { | |||
| 21 | 19 | ||
| 22 | /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) | 20 | /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) |
| 23 | static bool IsPassThroughTevStage(const TevStageConfig& stage) { | 21 | static bool IsPassThroughTevStage(const TevStageConfig& stage) { |
| 24 | return (stage.color_op == TevStageConfig::Operation::Replace && | 22 | return (stage.color_op == TevStageConfig::Operation::Replace && |
| 25 | stage.alpha_op == TevStageConfig::Operation::Replace && | 23 | stage.alpha_op == TevStageConfig::Operation::Replace && |
| 26 | stage.color_source1 == TevStageConfig::Source::Previous && | 24 | stage.color_source1 == TevStageConfig::Source::Previous && |
| 27 | stage.alpha_source1 == TevStageConfig::Source::Previous && | 25 | stage.alpha_source1 == TevStageConfig::Source::Previous && |
| 28 | stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && | 26 | stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && |
| 29 | stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && | 27 | stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && |
| 30 | stage.GetColorMultiplier() == 1 && | 28 | stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); |
| 31 | stage.GetAlphaMultiplier() == 1); | ||
| 32 | } | 29 | } |
| 33 | 30 | ||
| 34 | /// Writes the specified TEV stage source component(s) | 31 | /// Writes the specified TEV stage source component(s) |
| 35 | static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, | 32 | static void AppendSource(std::string& out, const PicaShaderConfig& config, |
| 36 | const std::string& index_name) { | 33 | TevStageConfig::Source source, const std::string& index_name) { |
| 37 | const auto& state = config.state; | 34 | const auto& state = config.state; |
| 38 | using Source = TevStageConfig::Source; | 35 | using Source = TevStageConfig::Source; |
| 39 | switch (source) { | 36 | switch (source) { |
| @@ -48,7 +45,7 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt | |||
| 48 | break; | 45 | break; |
| 49 | case Source::Texture0: | 46 | case Source::Texture0: |
| 50 | // Only unit 0 respects the texturing type (according to 3DBrew) | 47 | // Only unit 0 respects the texturing type (according to 3DBrew) |
| 51 | switch(state.texture0_type) { | 48 | switch (state.texture0_type) { |
| 52 | case Pica::Regs::TextureConfig::Texture2D: | 49 | case Pica::Regs::TextureConfig::Texture2D: |
| 53 | out += "texture(tex[0], texcoord[0])"; | 50 | out += "texture(tex[0], texcoord[0])"; |
| 54 | break; | 51 | break; |
| @@ -57,7 +54,8 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt | |||
| 57 | break; | 54 | break; |
| 58 | default: | 55 | default: |
| 59 | out += "texture(tex[0], texcoord[0])"; | 56 | out += "texture(tex[0], texcoord[0])"; |
| 60 | LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type)); | 57 | LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", |
| 58 | static_cast<int>(state.texture0_type)); | ||
| 61 | UNIMPLEMENTED(); | 59 | UNIMPLEMENTED(); |
| 62 | break; | 60 | break; |
| 63 | } | 61 | } |
| @@ -85,8 +83,9 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, TevSt | |||
| 85 | } | 83 | } |
| 86 | 84 | ||
| 87 | /// Writes the color components to use for the specified TEV stage color modifier | 85 | /// Writes the color components to use for the specified TEV stage color modifier |
| 88 | static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, | 86 | static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, |
| 89 | TevStageConfig::Source source, const std::string& index_name) { | 87 | TevStageConfig::ColorModifier modifier, |
| 88 | TevStageConfig::Source source, const std::string& index_name) { | ||
| 90 | using ColorModifier = TevStageConfig::ColorModifier; | 89 | using ColorModifier = TevStageConfig::ColorModifier; |
| 91 | switch (modifier) { | 90 | switch (modifier) { |
| 92 | case ColorModifier::SourceColor: | 91 | case ColorModifier::SourceColor: |
| @@ -142,8 +141,9 @@ static void AppendColorModifier(std::string& out, const PicaShaderConfig& config | |||
| 142 | } | 141 | } |
| 143 | 142 | ||
| 144 | /// Writes the alpha component to use for the specified TEV stage alpha modifier | 143 | /// Writes the alpha component to use for the specified TEV stage alpha modifier |
| 145 | static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, | 144 | static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, |
| 146 | TevStageConfig::Source source, const std::string& index_name) { | 145 | TevStageConfig::AlphaModifier modifier, |
| 146 | TevStageConfig::Source source, const std::string& index_name) { | ||
| 147 | using AlphaModifier = TevStageConfig::AlphaModifier; | 147 | using AlphaModifier = TevStageConfig::AlphaModifier; |
| 148 | switch (modifier) { | 148 | switch (modifier) { |
| 149 | case AlphaModifier::SourceAlpha: | 149 | case AlphaModifier::SourceAlpha: |
| @@ -191,7 +191,7 @@ static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config | |||
| 191 | 191 | ||
| 192 | /// Writes the combiner function for the color components for the specified TEV stage operation | 192 | /// Writes the combiner function for the color components for the specified TEV stage operation |
| 193 | static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, | 193 | static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, |
| 194 | const std::string& variable_name) { | 194 | const std::string& variable_name) { |
| 195 | out += "clamp("; | 195 | out += "clamp("; |
| 196 | using Operation = TevStageConfig::Operation; | 196 | using Operation = TevStageConfig::Operation; |
| 197 | switch (operation) { | 197 | switch (operation) { |
| @@ -208,8 +208,10 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper | |||
| 208 | out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)"; | 208 | out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)"; |
| 209 | break; | 209 | break; |
| 210 | case Operation::Lerp: | 210 | case Operation::Lerp: |
| 211 | // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use builtin lerp | 211 | // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use |
| 212 | out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (vec3(1.0) - " + variable_name + "[2])"; | 212 | // builtin lerp |
| 213 | out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + | ||
| 214 | "[1] * (vec3(1.0) - " + variable_name + "[2])"; | ||
| 213 | break; | 215 | break; |
| 214 | case Operation::Subtract: | 216 | case Operation::Subtract: |
| 215 | out += variable_name + "[0] - " + variable_name + "[1]"; | 217 | out += variable_name + "[0] - " + variable_name + "[1]"; |
| @@ -218,10 +220,12 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper | |||
| 218 | out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; | 220 | out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; |
| 219 | break; | 221 | break; |
| 220 | case Operation::AddThenMultiply: | 222 | case Operation::AddThenMultiply: |
| 221 | out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; | 223 | out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + |
| 224 | variable_name + "[2]"; | ||
| 222 | break; | 225 | break; |
| 223 | case Operation::Dot3_RGB: | 226 | case Operation::Dot3_RGB: |
| 224 | out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)"; | 227 | out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + |
| 228 | "[1] - vec3(0.5)) * 4.0)"; | ||
| 225 | break; | 229 | break; |
| 226 | default: | 230 | default: |
| 227 | out += "vec3(0.0)"; | 231 | out += "vec3(0.0)"; |
| @@ -233,7 +237,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper | |||
| 233 | 237 | ||
| 234 | /// Writes the combiner function for the alpha component for the specified TEV stage operation | 238 | /// Writes the combiner function for the alpha component for the specified TEV stage operation |
| 235 | static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, | 239 | static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, |
| 236 | const std::string& variable_name) { | 240 | const std::string& variable_name) { |
| 237 | out += "clamp("; | 241 | out += "clamp("; |
| 238 | using Operation = TevStageConfig::Operation; | 242 | using Operation = TevStageConfig::Operation; |
| 239 | switch (operation) { | 243 | switch (operation) { |
| @@ -250,7 +254,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper | |||
| 250 | out += variable_name + "[0] + " + variable_name + "[1] - 0.5"; | 254 | out += variable_name + "[0] + " + variable_name + "[1] - 0.5"; |
| 251 | break; | 255 | break; |
| 252 | case Operation::Lerp: | 256 | case Operation::Lerp: |
| 253 | out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + "[1] * (1.0 - " + variable_name + "[2])"; | 257 | out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name + |
| 258 | "[1] * (1.0 - " + variable_name + "[2])"; | ||
| 254 | break; | 259 | break; |
| 255 | case Operation::Subtract: | 260 | case Operation::Subtract: |
| 256 | out += variable_name + "[0] - " + variable_name + "[1]"; | 261 | out += variable_name + "[0] - " + variable_name + "[1]"; |
| @@ -259,7 +264,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper | |||
| 259 | out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; | 264 | out += variable_name + "[0] * " + variable_name + "[1] + " + variable_name + "[2]"; |
| 260 | break; | 265 | break; |
| 261 | case Operation::AddThenMultiply: | 266 | case Operation::AddThenMultiply: |
| 262 | out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + "[2]"; | 267 | out += "min(" + variable_name + "[0] + " + variable_name + "[1], 1.0) * " + variable_name + |
| 268 | "[2]"; | ||
| 263 | break; | 269 | break; |
| 264 | default: | 270 | default: |
| 265 | out += "0.0"; | 271 | out += "0.0"; |
| @@ -284,9 +290,8 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { | |||
| 284 | case CompareFunc::LessThan: | 290 | case CompareFunc::LessThan: |
| 285 | case CompareFunc::LessThanOrEqual: | 291 | case CompareFunc::LessThanOrEqual: |
| 286 | case CompareFunc::GreaterThan: | 292 | case CompareFunc::GreaterThan: |
| 287 | case CompareFunc::GreaterThanOrEqual: | 293 | case CompareFunc::GreaterThanOrEqual: { |
| 288 | { | 294 | static const char* op[] = {"!=", "==", ">=", ">", "<=", "<"}; |
| 289 | static const char* op[] = { "!=", "==", ">=", ">", "<=", "<", }; | ||
| 290 | unsigned index = (unsigned)func - (unsigned)CompareFunc::Equal; | 295 | unsigned index = (unsigned)func - (unsigned)CompareFunc::Equal; |
| 291 | out += "int(last_tex_env_out.a * 255.0f) " + std::string(op[index]) + " alphatest_ref"; | 296 | out += "int(last_tex_env_out.a * 255.0f) " + std::string(op[index]) + " alphatest_ref"; |
| 292 | break; | 297 | break; |
| @@ -301,7 +306,8 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { | |||
| 301 | 306 | ||
| 302 | /// Writes the code to emulate the specified TEV stage | 307 | /// Writes the code to emulate the specified TEV stage |
| 303 | static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { | 308 | static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { |
| 304 | const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); | 309 | const auto stage = |
| 310 | static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); | ||
| 305 | if (!IsPassThroughTevStage(stage)) { | 311 | if (!IsPassThroughTevStage(stage)) { |
| 306 | std::string index_name = std::to_string(index); | 312 | std::string index_name = std::to_string(index); |
| 307 | 313 | ||
| @@ -330,8 +336,12 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi | |||
| 330 | out += ";\n"; | 336 | out += ";\n"; |
| 331 | 337 | ||
| 332 | out += "last_tex_env_out = vec4(" | 338 | out += "last_tex_env_out = vec4(" |
| 333 | "clamp(color_output_" + index_name + " * " + std::to_string(stage.GetColorMultiplier()) + ".0, vec3(0.0), vec3(1.0))," | 339 | "clamp(color_output_" + |
| 334 | "clamp(alpha_output_" + index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) + ".0, 0.0, 1.0));\n"; | 340 | index_name + " * " + std::to_string(stage.GetColorMultiplier()) + |
| 341 | ".0, vec3(0.0), vec3(1.0))," | ||
| 342 | "clamp(alpha_output_" + | ||
| 343 | index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) + | ||
| 344 | ".0, 0.0, 1.0));\n"; | ||
| 335 | } | 345 | } |
| 336 | 346 | ||
| 337 | out += "combiner_buffer = next_combiner_buffer;\n"; | 347 | out += "combiner_buffer = next_combiner_buffer;\n"; |
| @@ -355,13 +365,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 355 | 365 | ||
| 356 | // Compute fragment normals | 366 | // Compute fragment normals |
| 357 | if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { | 367 | if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { |
| 358 | // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture | 368 | // Bump mapping is enabled using a normal map, read perturbation vector from the selected |
| 369 | // texture | ||
| 359 | std::string bump_selector = std::to_string(lighting.bump_selector); | 370 | std::string bump_selector = std::to_string(lighting.bump_selector); |
| 360 | out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; | 371 | out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + |
| 372 | bump_selector + "]).rgb - 1.0;\n"; | ||
| 361 | 373 | ||
| 362 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result | 374 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher |
| 375 | // precision result | ||
| 363 | if (lighting.bump_renorm) { | 376 | if (lighting.bump_renorm) { |
| 364 | std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; | 377 | std::string val = |
| 378 | "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; | ||
| 365 | out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; | 379 | out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; |
| 366 | } | 380 | } |
| 367 | } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { | 381 | } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { |
| @@ -373,7 +387,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 373 | out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; | 387 | out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; |
| 374 | } | 388 | } |
| 375 | 389 | ||
| 376 | // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace | 390 | // Rotate the surface-local normal by the interpolated normal quaternion to convert it to |
| 391 | // eyespace | ||
| 377 | out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; | 392 | out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; |
| 378 | 393 | ||
| 379 | // Gets the index into the specified lookup table for specular lighting | 394 | // Gets the index into the specified lookup table for specular lighting |
| @@ -406,12 +421,14 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 406 | 421 | ||
| 407 | if (abs) { | 422 | if (abs) { |
| 408 | // LUT index is in the range of (0.0, 1.0) | 423 | // LUT index is in the range of (0.0, 1.0) |
| 409 | index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; | 424 | index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" |
| 425 | : "max(" + index + ", 0.f)"; | ||
| 410 | return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; | 426 | return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; |
| 411 | } else { | 427 | } else { |
| 412 | // LUT index is in the range of (-1.0, 1.0) | 428 | // LUT index is in the range of (-1.0, 1.0) |
| 413 | index = "clamp(" + index + ", -1.0, 1.0)"; | 429 | index = "clamp(" + index + ", -1.0, 1.0)"; |
| 414 | return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; | 430 | return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + |
| 431 | ") / 2.0)"; | ||
| 415 | } | 432 | } |
| 416 | 433 | ||
| 417 | return std::string(); | 434 | return std::string(); |
| @@ -434,52 +451,74 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 434 | else | 451 | else |
| 435 | out += "light_vector = normalize(" + light_src + ".position + view);\n"; | 452 | out += "light_vector = normalize(" + light_src + ".position + view);\n"; |
| 436 | 453 | ||
| 437 | // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided | 454 | // Compute dot product of light_vector and normal, adjust if lighting is one-sided or |
| 438 | std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; | 455 | // two-sided |
| 456 | std::string dot_product = light_config.two_sided_diffuse | ||
| 457 | ? "abs(dot(light_vector, normal))" | ||
| 458 | : "max(dot(light_vector, normal), 0.0)"; | ||
| 439 | 459 | ||
| 440 | // If enabled, compute distance attenuation value | 460 | // If enabled, compute distance attenuation value |
| 441 | std::string dist_atten = "1.0"; | 461 | std::string dist_atten = "1.0"; |
| 442 | if (light_config.dist_atten_enable) { | 462 | if (light_config.dist_atten_enable) { |
| 443 | std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + light_src + ".position) + " + light_src + ".dist_atten_bias)"; | 463 | std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + |
| 464 | light_src + ".position) + " + light_src + ".dist_atten_bias)"; | ||
| 444 | index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; | 465 | index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; |
| 445 | const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); | 466 | const unsigned lut_num = |
| 467 | ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); | ||
| 446 | dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); | 468 | dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); |
| 447 | } | 469 | } |
| 448 | 470 | ||
| 449 | // If enabled, clamp specular component if lighting result is negative | 471 | // If enabled, clamp specular component if lighting result is negative |
| 450 | std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; | 472 | std::string clamp_highlights = |
| 473 | lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; | ||
| 451 | 474 | ||
| 452 | // Specular 0 component | 475 | // Specular 0 component |
| 453 | std::string d0_lut_value = "1.0"; | 476 | std::string d0_lut_value = "1.0"; |
| 454 | if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { | 477 | if (lighting.lut_d0.enable && |
| 478 | Pica::Regs::IsLightingSamplerSupported(lighting.config, | ||
| 479 | Pica::Regs::LightingSampler::Distribution0)) { | ||
| 455 | // Lookup specular "distribution 0" LUT value | 480 | // Lookup specular "distribution 0" LUT value |
| 456 | std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); | 481 | std::string index = |
| 457 | d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; | 482 | GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); |
| 483 | d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + | ||
| 484 | GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; | ||
| 458 | } | 485 | } |
| 459 | std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; | 486 | std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; |
| 460 | 487 | ||
| 461 | // If enabled, lookup ReflectRed value, otherwise, 1.0 is used | 488 | // If enabled, lookup ReflectRed value, otherwise, 1.0 is used |
| 462 | if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { | 489 | if (lighting.lut_rr.enable && |
| 463 | std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); | 490 | Pica::Regs::IsLightingSamplerSupported(lighting.config, |
| 464 | std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; | 491 | Pica::Regs::LightingSampler::ReflectRed)) { |
| 492 | std::string index = | ||
| 493 | GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); | ||
| 494 | std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + | ||
| 495 | GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; | ||
| 465 | out += "refl_value.r = " + value + ";\n"; | 496 | out += "refl_value.r = " + value + ";\n"; |
| 466 | } else { | 497 | } else { |
| 467 | out += "refl_value.r = 1.0;\n"; | 498 | out += "refl_value.r = 1.0;\n"; |
| 468 | } | 499 | } |
| 469 | 500 | ||
| 470 | // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used | 501 | // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used |
| 471 | if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { | 502 | if (lighting.lut_rg.enable && |
| 472 | std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); | 503 | Pica::Regs::IsLightingSamplerSupported(lighting.config, |
| 473 | std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; | 504 | Pica::Regs::LightingSampler::ReflectGreen)) { |
| 505 | std::string index = | ||
| 506 | GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); | ||
| 507 | std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + | ||
| 508 | GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; | ||
| 474 | out += "refl_value.g = " + value + ";\n"; | 509 | out += "refl_value.g = " + value + ";\n"; |
| 475 | } else { | 510 | } else { |
| 476 | out += "refl_value.g = refl_value.r;\n"; | 511 | out += "refl_value.g = refl_value.r;\n"; |
| 477 | } | 512 | } |
| 478 | 513 | ||
| 479 | // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used | 514 | // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used |
| 480 | if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { | 515 | if (lighting.lut_rb.enable && |
| 481 | std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); | 516 | Pica::Regs::IsLightingSamplerSupported(lighting.config, |
| 482 | std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; | 517 | Pica::Regs::LightingSampler::ReflectBlue)) { |
| 518 | std::string index = | ||
| 519 | GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); | ||
| 520 | std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + | ||
| 521 | GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; | ||
| 483 | out += "refl_value.b = " + value + ";\n"; | 522 | out += "refl_value.b = " + value + ";\n"; |
| 484 | } else { | 523 | } else { |
| 485 | out += "refl_value.b = refl_value.r;\n"; | 524 | out += "refl_value.b = refl_value.r;\n"; |
| @@ -487,18 +526,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 487 | 526 | ||
| 488 | // Specular 1 component | 527 | // Specular 1 component |
| 489 | std::string d1_lut_value = "1.0"; | 528 | std::string d1_lut_value = "1.0"; |
| 490 | if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { | 529 | if (lighting.lut_d1.enable && |
| 530 | Pica::Regs::IsLightingSamplerSupported(lighting.config, | ||
| 531 | Pica::Regs::LightingSampler::Distribution1)) { | ||
| 491 | // Lookup specular "distribution 1" LUT value | 532 | // Lookup specular "distribution 1" LUT value |
| 492 | std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); | 533 | std::string index = |
| 493 | d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; | 534 | GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); |
| 535 | d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + | ||
| 536 | GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; | ||
| 494 | } | 537 | } |
| 495 | std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; | 538 | std::string specular_1 = |
| 539 | "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; | ||
| 496 | 540 | ||
| 497 | // Fresnel | 541 | // Fresnel |
| 498 | if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { | 542 | if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported( |
| 543 | lighting.config, Pica::Regs::LightingSampler::Fresnel)) { | ||
| 499 | // Lookup fresnel LUT value | 544 | // Lookup fresnel LUT value |
| 500 | std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); | 545 | std::string index = |
| 501 | std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; | 546 | GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); |
| 547 | std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + | ||
| 548 | GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; | ||
| 502 | 549 | ||
| 503 | // Enabled for difffuse lighting alpha component | 550 | // Enabled for difffuse lighting alpha component |
| 504 | if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || | 551 | if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || |
| @@ -512,10 +559,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 512 | } | 559 | } |
| 513 | 560 | ||
| 514 | // Compute primary fragment color (diffuse lighting) function | 561 | // Compute primary fragment color (diffuse lighting) function |
| 515 | out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; | 562 | out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + |
| 563 | light_src + ".ambient) * " + dist_atten + ";\n"; | ||
| 516 | 564 | ||
| 517 | // Compute secondary fragment color (specular lighting) function | 565 | // Compute secondary fragment color (specular lighting) function |
| 518 | out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; | 566 | out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + |
| 567 | clamp_highlights + " * " + dist_atten + ";\n"; | ||
| 519 | } | 568 | } |
| 520 | 569 | ||
| 521 | // Sum final lighting result | 570 | // Sum final lighting result |
| @@ -598,9 +647,9 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
| 598 | out += "!"; | 647 | out += "!"; |
| 599 | // x2,y2 have +1 added to cover the entire pixel area | 648 | // x2,y2 have +1 added to cover the entire pixel area |
| 600 | out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && " | 649 | out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && " |
| 601 | "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && " | 650 | "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && " |
| 602 | "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && " | 651 | "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && " |
| 603 | "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n"; | 652 | "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n"; |
| 604 | } | 653 | } |
| 605 | 654 | ||
| 606 | out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; | 655 | out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; |
| @@ -638,9 +687,11 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
| 638 | out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; | 687 | out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; |
| 639 | out += "float fog_f = fog_index - fog_i;\n"; | 688 | out += "float fog_f = fog_index - fog_i;\n"; |
| 640 | out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n"; | 689 | out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n"; |
| 641 | out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> 19);\n"; // Extract signed difference | 690 | out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> " |
| 691 | "19);\n"; // Extract signed difference | ||
| 642 | out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n"; | 692 | out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n"; |
| 643 | out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / 2047.0;\n"; | 693 | out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / " |
| 694 | "2047.0;\n"; | ||
| 644 | out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; | 695 | out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; |
| 645 | 696 | ||
| 646 | // Blend the fog | 697 | // Blend the fog |
| @@ -658,14 +709,20 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
| 658 | std::string GenerateVertexShader() { | 709 | std::string GenerateVertexShader() { |
| 659 | std::string out = "#version 330 core\n"; | 710 | std::string out = "#version 330 core\n"; |
| 660 | 711 | ||
| 661 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; | 712 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + |
| 662 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; | 713 | ") in vec4 vert_position;\n"; |
| 663 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; | 714 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; |
| 664 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; | 715 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + |
| 665 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; | 716 | ") in vec2 vert_texcoord0;\n"; |
| 666 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; | 717 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + |
| 667 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; | 718 | ") in vec2 vert_texcoord1;\n"; |
| 668 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; | 719 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + |
| 720 | ") in vec2 vert_texcoord2;\n"; | ||
| 721 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + | ||
| 722 | ") in float vert_texcoord0_w;\n"; | ||
| 723 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + | ||
| 724 | ") in vec4 vert_normquat;\n"; | ||
| 725 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; | ||
| 669 | 726 | ||
| 670 | out += R"( | 727 | out += R"( |
| 671 | out vec4 primary_color; | 728 | out vec4 primary_color; |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index dded3db46..fe07aa6eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -3,9 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <vector> | 5 | #include <vector> |
| 6 | |||
| 7 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 8 | |||
| 9 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 10 | #include "video_core/renderer_opengl/gl_shader_util.h" | 8 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 11 | 9 | ||
| @@ -56,7 +54,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { | |||
| 56 | if (result) { | 54 | if (result) { |
| 57 | LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); | 55 | LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); |
| 58 | } else { | 56 | } else { |
| 59 | LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", &fragment_shader_error[0]); | 57 | LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", |
| 58 | &fragment_shader_error[0]); | ||
| 60 | } | 59 | } |
| 61 | } | 60 | } |
| 62 | 61 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 13ee986b9..ed84cadea 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -3,10 +3,8 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <glad/glad.h> | 5 | #include <glad/glad.h> |
| 6 | |||
| 7 | #include "common/common_funcs.h" | 6 | #include "common/common_funcs.h" |
| 8 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 9 | |||
| 10 | #include "video_core/renderer_opengl/gl_state.h" | 8 | #include "video_core/renderer_opengl/gl_state.h" |
| 11 | 9 | ||
| 12 | OpenGLState OpenGLState::cur_state; | 10 | OpenGLState OpenGLState::cur_state; |
| @@ -106,11 +104,11 @@ void OpenGLState::Apply() const { | |||
| 106 | 104 | ||
| 107 | // Color mask | 105 | // Color mask |
| 108 | if (color_mask.red_enabled != cur_state.color_mask.red_enabled || | 106 | if (color_mask.red_enabled != cur_state.color_mask.red_enabled || |
| 109 | color_mask.green_enabled != cur_state.color_mask.green_enabled || | 107 | color_mask.green_enabled != cur_state.color_mask.green_enabled || |
| 110 | color_mask.blue_enabled != cur_state.color_mask.blue_enabled || | 108 | color_mask.blue_enabled != cur_state.color_mask.blue_enabled || |
| 111 | color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { | 109 | color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { |
| 112 | glColorMask(color_mask.red_enabled, color_mask.green_enabled, | 110 | glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled, |
| 113 | color_mask.blue_enabled, color_mask.alpha_enabled); | 111 | color_mask.alpha_enabled); |
| 114 | } | 112 | } |
| 115 | 113 | ||
| 116 | // Stencil test | 114 | // Stencil test |
| @@ -123,15 +121,16 @@ void OpenGLState::Apply() const { | |||
| 123 | } | 121 | } |
| 124 | 122 | ||
| 125 | if (stencil.test_func != cur_state.stencil.test_func || | 123 | if (stencil.test_func != cur_state.stencil.test_func || |
| 126 | stencil.test_ref != cur_state.stencil.test_ref || | 124 | stencil.test_ref != cur_state.stencil.test_ref || |
| 127 | stencil.test_mask != cur_state.stencil.test_mask) { | 125 | stencil.test_mask != cur_state.stencil.test_mask) { |
| 128 | glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); | 126 | glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); |
| 129 | } | 127 | } |
| 130 | 128 | ||
| 131 | if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || | 129 | if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || |
| 132 | stencil.action_depth_pass != cur_state.stencil.action_depth_pass || | 130 | stencil.action_depth_pass != cur_state.stencil.action_depth_pass || |
| 133 | stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { | 131 | stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { |
| 134 | glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass); | 132 | glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, |
| 133 | stencil.action_depth_pass); | ||
| 135 | } | 134 | } |
| 136 | 135 | ||
| 137 | // Stencil mask | 136 | // Stencil mask |
| @@ -154,23 +153,22 @@ void OpenGLState::Apply() const { | |||
| 154 | } | 153 | } |
| 155 | 154 | ||
| 156 | if (blend.color.red != cur_state.blend.color.red || | 155 | if (blend.color.red != cur_state.blend.color.red || |
| 157 | blend.color.green != cur_state.blend.color.green || | 156 | blend.color.green != cur_state.blend.color.green || |
| 158 | blend.color.blue != cur_state.blend.color.blue || | 157 | blend.color.blue != cur_state.blend.color.blue || |
| 159 | blend.color.alpha != cur_state.blend.color.alpha) { | 158 | blend.color.alpha != cur_state.blend.color.alpha) { |
| 160 | glBlendColor(blend.color.red, blend.color.green, | 159 | glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha); |
| 161 | blend.color.blue, blend.color.alpha); | ||
| 162 | } | 160 | } |
| 163 | 161 | ||
| 164 | if (blend.src_rgb_func != cur_state.blend.src_rgb_func || | 162 | if (blend.src_rgb_func != cur_state.blend.src_rgb_func || |
| 165 | blend.dst_rgb_func != cur_state.blend.dst_rgb_func || | 163 | blend.dst_rgb_func != cur_state.blend.dst_rgb_func || |
| 166 | blend.src_a_func != cur_state.blend.src_a_func || | 164 | blend.src_a_func != cur_state.blend.src_a_func || |
| 167 | blend.dst_a_func != cur_state.blend.dst_a_func) { | 165 | blend.dst_a_func != cur_state.blend.dst_a_func) { |
| 168 | glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, | 166 | glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, |
| 169 | blend.src_a_func, blend.dst_a_func); | 167 | blend.dst_a_func); |
| 170 | } | 168 | } |
| 171 | 169 | ||
| 172 | if (blend.rgb_equation != cur_state.blend.rgb_equation || | 170 | if (blend.rgb_equation != cur_state.blend.rgb_equation || |
| 173 | blend.a_equation != cur_state.blend.a_equation) { | 171 | blend.a_equation != cur_state.blend.a_equation) { |
| 174 | glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); | 172 | glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); |
| 175 | } | 173 | } |
| 176 | 174 | ||
| @@ -237,8 +235,11 @@ void OpenGLState::Apply() const { | |||
| 237 | GLenum OpenGLState::CheckFBStatus(GLenum target) { | 235 | GLenum OpenGLState::CheckFBStatus(GLenum target) { |
| 238 | GLenum fb_status = glCheckFramebufferStatus(target); | 236 | GLenum fb_status = glCheckFramebufferStatus(target); |
| 239 | if (fb_status != GL_FRAMEBUFFER_COMPLETE) { | 237 | if (fb_status != GL_FRAMEBUFFER_COMPLETE) { |
| 240 | const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK")); | 238 | const char* fb_description = |
| 241 | LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status); | 239 | (target == GL_READ_FRAMEBUFFER ? "READ" |
| 240 | : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK")); | ||
| 241 | LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, | ||
| 242 | fb_status); | ||
| 242 | } | 243 | } |
| 243 | 244 | ||
| 244 | return fb_status; | 245 | return fb_status; |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 13c71b0a6..01dead883 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -9,14 +9,14 @@ | |||
| 9 | class OpenGLState { | 9 | class OpenGLState { |
| 10 | public: | 10 | public: |
| 11 | struct { | 11 | struct { |
| 12 | bool enabled; // GL_CULL_FACE | 12 | bool enabled; // GL_CULL_FACE |
| 13 | GLenum mode; // GL_CULL_FACE_MODE | 13 | GLenum mode; // GL_CULL_FACE_MODE |
| 14 | GLenum front_face; // GL_FRONT_FACE | 14 | GLenum front_face; // GL_FRONT_FACE |
| 15 | } cull; | 15 | } cull; |
| 16 | 16 | ||
| 17 | struct { | 17 | struct { |
| 18 | bool test_enabled; // GL_DEPTH_TEST | 18 | bool test_enabled; // GL_DEPTH_TEST |
| 19 | GLenum test_func; // GL_DEPTH_FUNC | 19 | GLenum test_func; // GL_DEPTH_FUNC |
| 20 | GLboolean write_mask; // GL_DEPTH_WRITEMASK | 20 | GLboolean write_mask; // GL_DEPTH_WRITEMASK |
| 21 | } depth; | 21 | } depth; |
| 22 | 22 | ||
| @@ -28,24 +28,24 @@ public: | |||
| 28 | } color_mask; // GL_COLOR_WRITEMASK | 28 | } color_mask; // GL_COLOR_WRITEMASK |
| 29 | 29 | ||
| 30 | struct { | 30 | struct { |
| 31 | bool test_enabled; // GL_STENCIL_TEST | 31 | bool test_enabled; // GL_STENCIL_TEST |
| 32 | GLenum test_func; // GL_STENCIL_FUNC | 32 | GLenum test_func; // GL_STENCIL_FUNC |
| 33 | GLint test_ref; // GL_STENCIL_REF | 33 | GLint test_ref; // GL_STENCIL_REF |
| 34 | GLuint test_mask; // GL_STENCIL_VALUE_MASK | 34 | GLuint test_mask; // GL_STENCIL_VALUE_MASK |
| 35 | GLuint write_mask; // GL_STENCIL_WRITEMASK | 35 | GLuint write_mask; // GL_STENCIL_WRITEMASK |
| 36 | GLenum action_stencil_fail; // GL_STENCIL_FAIL | 36 | GLenum action_stencil_fail; // GL_STENCIL_FAIL |
| 37 | GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL | 37 | GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL |
| 38 | GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS | 38 | GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS |
| 39 | } stencil; | 39 | } stencil; |
| 40 | 40 | ||
| 41 | struct { | 41 | struct { |
| 42 | bool enabled; // GL_BLEND | 42 | bool enabled; // GL_BLEND |
| 43 | GLenum rgb_equation; // GL_BLEND_EQUATION_RGB | 43 | GLenum rgb_equation; // GL_BLEND_EQUATION_RGB |
| 44 | GLenum a_equation; // GL_BLEND_EQUATION_ALPHA | 44 | GLenum a_equation; // GL_BLEND_EQUATION_ALPHA |
| 45 | GLenum src_rgb_func; // GL_BLEND_SRC_RGB | 45 | GLenum src_rgb_func; // GL_BLEND_SRC_RGB |
| 46 | GLenum dst_rgb_func; // GL_BLEND_DST_RGB | 46 | GLenum dst_rgb_func; // GL_BLEND_DST_RGB |
| 47 | GLenum src_a_func; // GL_BLEND_SRC_ALPHA | 47 | GLenum src_a_func; // GL_BLEND_SRC_ALPHA |
| 48 | GLenum dst_a_func; // GL_BLEND_DST_ALPHA | 48 | GLenum dst_a_func; // GL_BLEND_DST_ALPHA |
| 49 | 49 | ||
| 50 | struct { | 50 | struct { |
| 51 | GLclampf red; | 51 | GLclampf red; |
| @@ -60,7 +60,7 @@ public: | |||
| 60 | // 3 texture units - one for each that is used in PICA fragment shader emulation | 60 | // 3 texture units - one for each that is used in PICA fragment shader emulation |
| 61 | struct { | 61 | struct { |
| 62 | GLuint texture_2d; // GL_TEXTURE_BINDING_2D | 62 | GLuint texture_2d; // GL_TEXTURE_BINDING_2D |
| 63 | GLuint sampler; // GL_SAMPLER_BINDING | 63 | GLuint sampler; // GL_SAMPLER_BINDING |
| 64 | } texture_units[3]; | 64 | } texture_units[3]; |
| 65 | 65 | ||
| 66 | struct { | 66 | struct { |
| @@ -74,10 +74,10 @@ public: | |||
| 74 | struct { | 74 | struct { |
| 75 | GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING | 75 | GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING |
| 76 | GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING | 76 | GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING |
| 77 | GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING | 77 | GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING |
| 78 | GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING | 78 | GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING |
| 79 | GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING | 79 | GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING |
| 80 | GLuint shader_program; // GL_CURRENT_PROGRAM | 80 | GLuint shader_program; // GL_CURRENT_PROGRAM |
| 81 | } draw; | 81 | } draw; |
| 82 | 82 | ||
| 83 | OpenGLState(); | 83 | OpenGLState(); |
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index d9b9c9cc2..cc49867c8 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h | |||
| @@ -6,15 +6,12 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | |||
| 10 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 11 | |||
| 12 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 13 | #include "common/bit_field.h" | 11 | #include "common/bit_field.h" |
| 14 | #include "common/common_funcs.h" | 12 | #include "common/common_funcs.h" |
| 15 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 16 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 17 | |||
| 18 | #include "video_core/pica.h" | 15 | #include "video_core/pica.h" |
| 19 | 16 | ||
| 20 | using GLvec2 = std::array<GLfloat, 2>; | 17 | using GLvec2 = std::array<GLfloat, 2>; |
| @@ -25,8 +22,8 @@ namespace PicaToGL { | |||
| 25 | 22 | ||
| 26 | inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { | 23 | inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { |
| 27 | static const GLenum filter_mode_table[] = { | 24 | static const GLenum filter_mode_table[] = { |
| 28 | GL_NEAREST, // TextureFilter::Nearest | 25 | GL_NEAREST, // TextureFilter::Nearest |
| 29 | GL_LINEAR // TextureFilter::Linear | 26 | GL_LINEAR, // TextureFilter::Linear |
| 30 | }; | 27 | }; |
| 31 | 28 | ||
| 32 | // Range check table for input | 29 | // Range check table for input |
| @@ -52,10 +49,10 @@ inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { | |||
| 52 | 49 | ||
| 53 | inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { | 50 | inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { |
| 54 | static const GLenum wrap_mode_table[] = { | 51 | static const GLenum wrap_mode_table[] = { |
| 55 | GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge | 52 | GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge |
| 56 | GL_CLAMP_TO_BORDER,// WrapMode::ClampToBorder | 53 | GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder |
| 57 | GL_REPEAT, // WrapMode::Repeat | 54 | GL_REPEAT, // WrapMode::Repeat |
| 58 | GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat | 55 | GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat |
| 59 | }; | 56 | }; |
| 60 | 57 | ||
| 61 | // Range check table for input | 58 | // Range check table for input |
| @@ -131,22 +128,22 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { | |||
| 131 | 128 | ||
| 132 | inline GLenum LogicOp(Pica::Regs::LogicOp op) { | 129 | inline GLenum LogicOp(Pica::Regs::LogicOp op) { |
| 133 | static const GLenum logic_op_table[] = { | 130 | static const GLenum logic_op_table[] = { |
| 134 | GL_CLEAR, // Clear | 131 | GL_CLEAR, // Clear |
| 135 | GL_AND, // And | 132 | GL_AND, // And |
| 136 | GL_AND_REVERSE, // AndReverse | 133 | GL_AND_REVERSE, // AndReverse |
| 137 | GL_COPY, // Copy | 134 | GL_COPY, // Copy |
| 138 | GL_SET, // Set | 135 | GL_SET, // Set |
| 139 | GL_COPY_INVERTED, // CopyInverted | 136 | GL_COPY_INVERTED, // CopyInverted |
| 140 | GL_NOOP, // NoOp | 137 | GL_NOOP, // NoOp |
| 141 | GL_INVERT, // Invert | 138 | GL_INVERT, // Invert |
| 142 | GL_NAND, // Nand | 139 | GL_NAND, // Nand |
| 143 | GL_OR, // Or | 140 | GL_OR, // Or |
| 144 | GL_NOR, // Nor | 141 | GL_NOR, // Nor |
| 145 | GL_XOR, // Xor | 142 | GL_XOR, // Xor |
| 146 | GL_EQUIV, // Equiv | 143 | GL_EQUIV, // Equiv |
| 147 | GL_AND_INVERTED, // AndInverted | 144 | GL_AND_INVERTED, // AndInverted |
| 148 | GL_OR_REVERSE, // OrReverse | 145 | GL_OR_REVERSE, // OrReverse |
| 149 | GL_OR_INVERTED, // OrInverted | 146 | GL_OR_INVERTED, // OrInverted |
| 150 | }; | 147 | }; |
| 151 | 148 | ||
| 152 | // Range check table for input | 149 | // Range check table for input |
| @@ -185,14 +182,14 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { | |||
| 185 | 182 | ||
| 186 | inline GLenum StencilOp(Pica::Regs::StencilAction action) { | 183 | inline GLenum StencilOp(Pica::Regs::StencilAction action) { |
| 187 | static const GLenum stencil_op_table[] = { | 184 | static const GLenum stencil_op_table[] = { |
| 188 | GL_KEEP, // StencilAction::Keep | 185 | GL_KEEP, // StencilAction::Keep |
| 189 | GL_ZERO, // StencilAction::Zero | 186 | GL_ZERO, // StencilAction::Zero |
| 190 | GL_REPLACE, // StencilAction::Replace | 187 | GL_REPLACE, // StencilAction::Replace |
| 191 | GL_INCR, // StencilAction::Increment | 188 | GL_INCR, // StencilAction::Increment |
| 192 | GL_DECR, // StencilAction::Decrement | 189 | GL_DECR, // StencilAction::Decrement |
| 193 | GL_INVERT, // StencilAction::Invert | 190 | GL_INVERT, // StencilAction::Invert |
| 194 | GL_INCR_WRAP, // StencilAction::IncrementWrap | 191 | GL_INCR_WRAP, // StencilAction::IncrementWrap |
| 195 | GL_DECR_WRAP // StencilAction::DecrementWrap | 192 | GL_DECR_WRAP, // StencilAction::DecrementWrap |
| 196 | }; | 193 | }; |
| 197 | 194 | ||
| 198 | // Range check table for input | 195 | // Range check table for input |
| @@ -207,18 +204,16 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) { | |||
| 207 | } | 204 | } |
| 208 | 205 | ||
| 209 | inline GLvec4 ColorRGBA8(const u32 color) { | 206 | inline GLvec4 ColorRGBA8(const u32 color) { |
| 210 | return { { (color >> 0 & 0xFF) / 255.0f, | 207 | return {{ |
| 211 | (color >> 8 & 0xFF) / 255.0f, | 208 | (color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f, (color >> 16 & 0xFF) / 255.0f, |
| 212 | (color >> 16 & 0xFF) / 255.0f, | 209 | (color >> 24 & 0xFF) / 255.0f, |
| 213 | (color >> 24 & 0xFF) / 255.0f | 210 | }}; |
| 214 | } }; | ||
| 215 | } | 211 | } |
| 216 | 212 | ||
| 217 | inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { | 213 | inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { |
| 218 | return { { color.r / 255.0f, | 214 | return {{ |
| 219 | color.g / 255.0f, | 215 | color.r / 255.0f, color.g / 255.0f, color.b / 255.0f, |
| 220 | color.b / 255.0f | 216 | }}; |
| 221 | } }; | ||
| 222 | } | 217 | } |
| 223 | 218 | ||
| 224 | } // namespace | 219 | } // namespace |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 8410e0a64..03a588364 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -6,23 +6,19 @@ | |||
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <cstdlib> | 7 | #include <cstdlib> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | |||
| 10 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 11 | |||
| 12 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 13 | #include "common/bit_field.h" | 11 | #include "common/bit_field.h" |
| 14 | #include "common/emu_window.h" | 12 | #include "common/emu_window.h" |
| 15 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 16 | #include "common/profiler_reporting.h" | 14 | #include "common/profiler_reporting.h" |
| 17 | #include "common/synchronized_wrapper.h" | 15 | #include "common/synchronized_wrapper.h" |
| 18 | |||
| 19 | #include "core/hw/gpu.h" | 16 | #include "core/hw/gpu.h" |
| 20 | #include "core/hw/hw.h" | 17 | #include "core/hw/hw.h" |
| 21 | #include "core/hw/lcd.h" | 18 | #include "core/hw/lcd.h" |
| 22 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 23 | #include "core/settings.h" | 20 | #include "core/settings.h" |
| 24 | #include "core/tracer/recorder.h" | 21 | #include "core/tracer/recorder.h" |
| 25 | |||
| 26 | #include "video_core/debug_utils/debug_utils.h" | 22 | #include "video_core/debug_utils/debug_utils.h" |
| 27 | #include "video_core/rasterizer_interface.h" | 23 | #include "video_core/rasterizer_interface.h" |
| 28 | #include "video_core/renderer_opengl/renderer_opengl.h" | 24 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| @@ -87,24 +83,25 @@ struct ScreenRectVertex { | |||
| 87 | * by a 3x2 matrix. | 83 | * by a 3x2 matrix. |
| 88 | */ | 84 | */ |
| 89 | static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { | 85 | static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { |
| 90 | std::array<GLfloat, 3 * 2> matrix; | 86 | std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order |
| 91 | 87 | ||
| 88 | // clang-format off | ||
| 92 | matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; | 89 | matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; |
| 93 | matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; | 90 | matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; |
| 94 | // Last matrix row is implicitly assumed to be [0, 0, 1]. | 91 | // Last matrix row is implicitly assumed to be [0, 0, 1]. |
| 92 | // clang-format on | ||
| 95 | 93 | ||
| 96 | return matrix; | 94 | return matrix; |
| 97 | } | 95 | } |
| 98 | 96 | ||
| 99 | /// RendererOpenGL constructor | 97 | /// RendererOpenGL constructor |
| 100 | RendererOpenGL::RendererOpenGL() { | 98 | RendererOpenGL::RendererOpenGL() { |
| 101 | resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); | 99 | resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); |
| 102 | resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight; | 100 | resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight; |
| 103 | } | 101 | } |
| 104 | 102 | ||
| 105 | /// RendererOpenGL destructor | 103 | /// RendererOpenGL destructor |
| 106 | RendererOpenGL::~RendererOpenGL() { | 104 | RendererOpenGL::~RendererOpenGL() {} |
| 107 | } | ||
| 108 | 105 | ||
| 109 | /// Swap buffers (render frame) | 106 | /// Swap buffers (render frame) |
| 110 | void RendererOpenGL::SwapBuffers() { | 107 | void RendererOpenGL::SwapBuffers() { |
| @@ -116,13 +113,15 @@ void RendererOpenGL::SwapBuffers() { | |||
| 116 | const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; | 113 | const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; |
| 117 | 114 | ||
| 118 | // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 | 115 | // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 |
| 119 | u32 lcd_color_addr = (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); | 116 | u32 lcd_color_addr = |
| 117 | (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); | ||
| 120 | lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; | 118 | lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; |
| 121 | LCD::Regs::ColorFill color_fill = {0}; | 119 | LCD::Regs::ColorFill color_fill = {0}; |
| 122 | LCD::Read(color_fill.raw, lcd_color_addr); | 120 | LCD::Read(color_fill.raw, lcd_color_addr); |
| 123 | 121 | ||
| 124 | if (color_fill.is_enabled) { | 122 | if (color_fill.is_enabled) { |
| 125 | LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture); | 123 | LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, |
| 124 | screen_infos[i].texture); | ||
| 126 | 125 | ||
| 127 | // Resize the texture in case the framebuffer size has changed | 126 | // Resize the texture in case the framebuffer size has changed |
| 128 | screen_infos[i].texture.width = 1; | 127 | screen_infos[i].texture.width = 1; |
| @@ -172,15 +171,14 @@ void RendererOpenGL::SwapBuffers() { | |||
| 172 | * Loads framebuffer from emulated memory into the active OpenGL texture. | 171 | * Loads framebuffer from emulated memory into the active OpenGL texture. |
| 173 | */ | 172 | */ |
| 174 | void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, | 173 | void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, |
| 175 | ScreenInfo& screen_info) { | 174 | ScreenInfo& screen_info) { |
| 176 | 175 | ||
| 177 | const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? | 176 | const PAddr framebuffer_addr = |
| 178 | framebuffer.address_left1 : framebuffer.address_left2; | 177 | framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2; |
| 179 | 178 | ||
| 180 | LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x", | 179 | LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x", |
| 181 | framebuffer.stride * framebuffer.height, | 180 | framebuffer.stride * framebuffer.height, framebuffer_addr, (int)framebuffer.width, |
| 182 | framebuffer_addr, (int)framebuffer.width, | 181 | (int)framebuffer.height, (int)framebuffer.format); |
| 183 | (int)framebuffer.height, (int)framebuffer.format); | ||
| 184 | 182 | ||
| 185 | int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); | 183 | int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); |
| 186 | size_t pixel_stride = framebuffer.stride / bpp; | 184 | size_t pixel_stride = framebuffer.stride / bpp; |
| @@ -192,7 +190,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram | |||
| 192 | // only allows rows to have a memory alignement of 4. | 190 | // only allows rows to have a memory alignement of 4. |
| 193 | ASSERT(pixel_stride % 4 == 0); | 191 | ASSERT(pixel_stride % 4 == 0); |
| 194 | 192 | ||
| 195 | if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) { | 193 | if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, |
| 194 | static_cast<u32>(pixel_stride), screen_info)) { | ||
| 196 | // Reset the screen info's display texture to its own permanent texture | 195 | // Reset the screen info's display texture to its own permanent texture |
| 197 | screen_info.display_texture = screen_info.texture.resource.handle; | 196 | screen_info.display_texture = screen_info.texture.resource.handle; |
| 198 | screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); | 197 | screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); |
| @@ -208,12 +207,13 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram | |||
| 208 | glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); | 207 | glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); |
| 209 | 208 | ||
| 210 | // Update existing texture | 209 | // Update existing texture |
| 211 | // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they | 210 | // TODO: Test what happens on hardware when you change the framebuffer dimensions so that |
| 212 | // differ from the LCD resolution. | 211 | // they differ from the LCD resolution. |
| 213 | // TODO: Applications could theoretically crash Citra here by specifying too large | 212 | // TODO: Applications could theoretically crash Citra here by specifying too large |
| 214 | // framebuffer sizes. We should make sure that this cannot happen. | 213 | // framebuffer sizes. We should make sure that this cannot happen. |
| 215 | glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, | 214 | glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, |
| 216 | screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data); | 215 | screen_info.texture.gl_format, screen_info.texture.gl_type, |
| 216 | framebuffer_data); | ||
| 217 | 217 | ||
| 218 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | 218 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); |
| 219 | 219 | ||
| @@ -223,9 +223,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram | |||
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | /** | 225 | /** |
| 226 | * Fills active OpenGL texture with the given RGB color. | 226 | * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can |
| 227 | * Since the color is solid, the texture can be 1x1 but will stretch across whatever it's rendered on. | 227 | * be 1x1 but will stretch across whatever it's rendered on. |
| 228 | * This has the added benefit of being *really fast*. | ||
| 229 | */ | 228 | */ |
| 230 | void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, | 229 | void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, |
| 231 | const TextureInfo& texture) { | 230 | const TextureInfo& texture) { |
| @@ -233,7 +232,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color | |||
| 233 | state.Apply(); | 232 | state.Apply(); |
| 234 | 233 | ||
| 235 | glActiveTexture(GL_TEXTURE0); | 234 | glActiveTexture(GL_TEXTURE0); |
| 236 | u8 framebuffer_data[3] = { color_r, color_g, color_b }; | 235 | u8 framebuffer_data[3] = {color_r, color_g, color_b}; |
| 237 | 236 | ||
| 238 | // Update existing texture | 237 | // Update existing texture |
| 239 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); | 238 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); |
| @@ -246,7 +245,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color | |||
| 246 | * Initializes the OpenGL state and creates persistent objects. | 245 | * Initializes the OpenGL state and creates persistent objects. |
| 247 | */ | 246 | */ |
| 248 | void RendererOpenGL::InitOpenGLObjects() { | 247 | void RendererOpenGL::InitOpenGLObjects() { |
| 249 | glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); | 248 | glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, |
| 249 | 0.0f); | ||
| 250 | 250 | ||
| 251 | // Link shaders and get variable locations | 251 | // Link shaders and get variable locations |
| 252 | shader.Create(vertex_shader, fragment_shader); | 252 | shader.Create(vertex_shader, fragment_shader); |
| @@ -270,8 +270,10 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 270 | 270 | ||
| 271 | // Attach vertex data to VAO | 271 | // Attach vertex data to VAO |
| 272 | glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); | 272 | glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); |
| 273 | glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, position)); | 273 | glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), |
| 274 | glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); | 274 | (GLvoid*)offsetof(ScreenRectVertex, position)); |
| 275 | glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), | ||
| 276 | (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); | ||
| 275 | glEnableVertexAttribArray(attrib_position); | 277 | glEnableVertexAttribArray(attrib_position); |
| 276 | glEnableVertexAttribArray(attrib_tex_coord); | 278 | glEnableVertexAttribArray(attrib_tex_coord); |
| 277 | 279 | ||
| @@ -352,23 +354,25 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 352 | 354 | ||
| 353 | glActiveTexture(GL_TEXTURE0); | 355 | glActiveTexture(GL_TEXTURE0); |
| 354 | glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, | 356 | glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, |
| 355 | texture.gl_format, texture.gl_type, nullptr); | 357 | texture.gl_format, texture.gl_type, nullptr); |
| 356 | 358 | ||
| 357 | state.texture_units[0].texture_2d = 0; | 359 | state.texture_units[0].texture_2d = 0; |
| 358 | state.Apply(); | 360 | state.Apply(); |
| 359 | } | 361 | } |
| 360 | 362 | ||
| 361 | /** | 363 | /** |
| 362 | * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. | 364 | * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD |
| 365 | * rotation. | ||
| 363 | */ | 366 | */ |
| 364 | void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) { | 367 | void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, |
| 368 | float w, float h) { | ||
| 365 | auto& texcoords = screen_info.display_texcoords; | 369 | auto& texcoords = screen_info.display_texcoords; |
| 366 | 370 | ||
| 367 | std::array<ScreenRectVertex, 4> vertices = {{ | 371 | std::array<ScreenRectVertex, 4> vertices = {{ |
| 368 | ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), | 372 | ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), |
| 369 | ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right), | 373 | ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right), |
| 370 | ScreenRectVertex(x, y+h, texcoords.top, texcoords.left), | 374 | ScreenRectVertex(x, y + h, texcoords.top, texcoords.left), |
| 371 | ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right), | 375 | ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right), |
| 372 | }}; | 376 | }}; |
| 373 | 377 | ||
| 374 | state.texture_units[0].texture_2d = screen_info.display_texture; | 378 | state.texture_units[0].texture_2d = screen_info.display_texture; |
| @@ -391,25 +395,26 @@ void RendererOpenGL::DrawScreens() { | |||
| 391 | glClear(GL_COLOR_BUFFER_BIT); | 395 | glClear(GL_COLOR_BUFFER_BIT); |
| 392 | 396 | ||
| 393 | // Set projection matrix | 397 | // Set projection matrix |
| 394 | std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, | 398 | std::array<GLfloat, 3 * 2> ortho_matrix = |
| 395 | (float)layout.height); | 399 | MakeOrthographicMatrix((float)layout.width, (float)layout.height); |
| 396 | glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); | 400 | glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); |
| 397 | 401 | ||
| 398 | // Bind texture in Texture Unit 0 | 402 | // Bind texture in Texture Unit 0 |
| 399 | glActiveTexture(GL_TEXTURE0); | 403 | glActiveTexture(GL_TEXTURE0); |
| 400 | glUniform1i(uniform_color_texture, 0); | 404 | glUniform1i(uniform_color_texture, 0); |
| 401 | 405 | ||
| 402 | DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top, | 406 | DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, |
| 403 | (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); | 407 | (float)layout.top_screen.top, (float)layout.top_screen.GetWidth(), |
| 404 | DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, | 408 | (float)layout.top_screen.GetHeight()); |
| 405 | (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); | 409 | DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left, |
| 410 | (float)layout.bottom_screen.top, (float)layout.bottom_screen.GetWidth(), | ||
| 411 | (float)layout.bottom_screen.GetHeight()); | ||
| 406 | 412 | ||
| 407 | m_current_frame++; | 413 | m_current_frame++; |
| 408 | } | 414 | } |
| 409 | 415 | ||
| 410 | /// Updates the framerate | 416 | /// Updates the framerate |
| 411 | void RendererOpenGL::UpdateFramerate() { | 417 | void RendererOpenGL::UpdateFramerate() {} |
| 412 | } | ||
| 413 | 418 | ||
| 414 | /** | 419 | /** |
| 415 | * Set the emulator window to use for renderer | 420 | * Set the emulator window to use for renderer |
| @@ -420,14 +425,16 @@ void RendererOpenGL::SetWindow(EmuWindow* window) { | |||
| 420 | } | 425 | } |
| 421 | 426 | ||
| 422 | static const char* GetSource(GLenum source) { | 427 | static const char* GetSource(GLenum source) { |
| 423 | #define RET(s) case GL_DEBUG_SOURCE_##s: return #s | 428 | #define RET(s) \ |
| 429 | case GL_DEBUG_SOURCE_##s: \ | ||
| 430 | return #s | ||
| 424 | switch (source) { | 431 | switch (source) { |
| 425 | RET(API); | 432 | RET(API); |
| 426 | RET(WINDOW_SYSTEM); | 433 | RET(WINDOW_SYSTEM); |
| 427 | RET(SHADER_COMPILER); | 434 | RET(SHADER_COMPILER); |
| 428 | RET(THIRD_PARTY); | 435 | RET(THIRD_PARTY); |
| 429 | RET(APPLICATION); | 436 | RET(APPLICATION); |
| 430 | RET(OTHER); | 437 | RET(OTHER); |
| 431 | default: | 438 | default: |
| 432 | UNREACHABLE(); | 439 | UNREACHABLE(); |
| 433 | } | 440 | } |
| @@ -435,23 +442,25 @@ static const char* GetSource(GLenum source) { | |||
| 435 | } | 442 | } |
| 436 | 443 | ||
| 437 | static const char* GetType(GLenum type) { | 444 | static const char* GetType(GLenum type) { |
| 438 | #define RET(t) case GL_DEBUG_TYPE_##t: return #t | 445 | #define RET(t) \ |
| 446 | case GL_DEBUG_TYPE_##t: \ | ||
| 447 | return #t | ||
| 439 | switch (type) { | 448 | switch (type) { |
| 440 | RET(ERROR); | 449 | RET(ERROR); |
| 441 | RET(DEPRECATED_BEHAVIOR); | 450 | RET(DEPRECATED_BEHAVIOR); |
| 442 | RET(UNDEFINED_BEHAVIOR); | 451 | RET(UNDEFINED_BEHAVIOR); |
| 443 | RET(PORTABILITY); | 452 | RET(PORTABILITY); |
| 444 | RET(PERFORMANCE); | 453 | RET(PERFORMANCE); |
| 445 | RET(OTHER); | 454 | RET(OTHER); |
| 446 | RET(MARKER); | 455 | RET(MARKER); |
| 447 | default: | 456 | default: |
| 448 | UNREACHABLE(); | 457 | UNREACHABLE(); |
| 449 | } | 458 | } |
| 450 | #undef RET | 459 | #undef RET |
| 451 | } | 460 | } |
| 452 | 461 | ||
| 453 | static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, | 462 | static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, |
| 454 | const GLchar* message, const void* user_param) { | 463 | GLsizei length, const GLchar* message, const void* user_param) { |
| 455 | Log::Level level; | 464 | Log::Level level; |
| 456 | switch (severity) { | 465 | switch (severity) { |
| 457 | case GL_DEBUG_SEVERITY_HIGH: | 466 | case GL_DEBUG_SEVERITY_HIGH: |
| @@ -465,8 +474,8 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum | |||
| 465 | level = Log::Level::Debug; | 474 | level = Log::Level::Debug; |
| 466 | break; | 475 | break; |
| 467 | } | 476 | } |
| 468 | LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", | 477 | LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", GetSource(source), GetType(type), |
| 469 | GetSource(source), GetType(type), id, message); | 478 | id, message); |
| 470 | } | 479 | } |
| 471 | 480 | ||
| 472 | /// Initialize the renderer | 481 | /// Initialize the renderer |
| @@ -493,5 +502,4 @@ bool RendererOpenGL::Init() { | |||
| 493 | } | 502 | } |
| 494 | 503 | ||
| 495 | /// Shutdown the renderer | 504 | /// Shutdown the renderer |
| 496 | void RendererOpenGL::ShutDown() { | 505 | void RendererOpenGL::ShutDown() {} |
| 497 | } | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 00e1044ab..87c556cff 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -5,14 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | |||
| 9 | #include <glad/glad.h> | 8 | #include <glad/glad.h> |
| 10 | |||
| 11 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 12 | #include "common/math_util.h" | 10 | #include "common/math_util.h" |
| 13 | |||
| 14 | #include "core/hw/gpu.h" | 11 | #include "core/hw/gpu.h" |
| 15 | |||
| 16 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | #include "video_core/renderer_opengl/gl_state.h" | 14 | #include "video_core/renderer_opengl/gl_state.h" |
| @@ -38,7 +34,6 @@ struct ScreenInfo { | |||
| 38 | 34 | ||
| 39 | class RendererOpenGL : public RendererBase { | 35 | class RendererOpenGL : public RendererBase { |
| 40 | public: | 36 | public: |
| 41 | |||
| 42 | RendererOpenGL(); | 37 | RendererOpenGL(); |
| 43 | ~RendererOpenGL() override; | 38 | ~RendererOpenGL() override; |
| 44 | 39 | ||
| @@ -67,15 +62,14 @@ private: | |||
| 67 | 62 | ||
| 68 | // Loads framebuffer from emulated memory into the display information structure | 63 | // Loads framebuffer from emulated memory into the display information structure |
| 69 | void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, | 64 | void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, |
| 70 | ScreenInfo& screen_info); | 65 | ScreenInfo& screen_info); |
| 71 | // Fills active OpenGL texture with the given RGB color. | 66 | // Fills active OpenGL texture with the given RGB color. |
| 72 | void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, | 67 | void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); |
| 73 | const TextureInfo& texture); | ||
| 74 | 68 | ||
| 75 | EmuWindow* render_window; ///< Handle to render window | 69 | EmuWindow* render_window; ///< Handle to render window |
| 76 | 70 | ||
| 77 | int resolution_width; ///< Current resolution width | 71 | int resolution_width; ///< Current resolution width |
| 78 | int resolution_height; ///< Current resolution height | 72 | int resolution_height; ///< Current resolution height |
| 79 | 73 | ||
| 80 | OpenGLState state; | 74 | OpenGLState state; |
| 81 | 75 | ||
| @@ -83,10 +77,14 @@ private: | |||
| 83 | OGLVertexArray vertex_array; | 77 | OGLVertexArray vertex_array; |
| 84 | OGLBuffer vertex_buffer; | 78 | OGLBuffer vertex_buffer; |
| 85 | OGLShader shader; | 79 | OGLShader shader; |
| 86 | std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively | 80 | |
| 81 | /// Display information for top and bottom screens respectively | ||
| 82 | std::array<ScreenInfo, 2> screen_infos; | ||
| 83 | |||
| 87 | // Shader uniform location indices | 84 | // Shader uniform location indices |
| 88 | GLuint uniform_modelview_matrix; | 85 | GLuint uniform_modelview_matrix; |
| 89 | GLuint uniform_color_texture; | 86 | GLuint uniform_color_texture; |
| 87 | |||
| 90 | // Shader attribute input indices | 88 | // Shader attribute input indices |
| 91 | GLuint attrib_position; | 89 | GLuint attrib_position; |
| 92 | GLuint attrib_tex_coord; | 90 | GLuint attrib_tex_coord; |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index f565e2c91..272f3ffe1 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -7,23 +7,18 @@ | |||
| 7 | #include <cstring> | 7 | #include <cstring> |
| 8 | #include <unordered_map> | 8 | #include <unordered_map> |
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | |||
| 11 | #include <boost/range/algorithm/fill.hpp> | 10 | #include <boost/range/algorithm/fill.hpp> |
| 12 | |||
| 13 | #include "common/bit_field.h" | 11 | #include "common/bit_field.h" |
| 14 | #include "common/hash.h" | 12 | #include "common/hash.h" |
| 15 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 16 | #include "common/microprofile.h" | 14 | #include "common/microprofile.h" |
| 17 | |||
| 18 | #include "video_core/pica.h" | 15 | #include "video_core/pica.h" |
| 19 | #include "video_core/pica_state.h" | 16 | #include "video_core/pica_state.h" |
| 20 | #include "video_core/shader/shader.h" | 17 | #include "video_core/shader/shader.h" |
| 21 | #include "video_core/shader/shader_interpreter.h" | 18 | #include "video_core/shader/shader_interpreter.h" |
| 22 | |||
| 23 | #ifdef ARCHITECTURE_x86_64 | 19 | #ifdef ARCHITECTURE_x86_64 |
| 24 | #include "video_core/shader/shader_jit_x64.h" | 20 | #include "video_core/shader/shader_jit_x64.h" |
| 25 | #endif // ARCHITECTURE_x86_64 | 21 | #endif // ARCHITECTURE_x86_64 |
| 26 | |||
| 27 | #include "video_core/video_core.h" | 22 | #include "video_core/video_core.h" |
| 28 | 23 | ||
| 29 | namespace Pica { | 24 | namespace Pica { |
| @@ -46,10 +41,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { | |||
| 46 | 41 | ||
| 47 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; | 42 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; |
| 48 | 43 | ||
| 49 | u32 semantics[4] = { | 44 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, |
| 50 | output_register_map.map_x, output_register_map.map_y, | 45 | output_register_map.map_z, output_register_map.map_w}; |
| 51 | output_register_map.map_z, output_register_map.map_w | ||
| 52 | }; | ||
| 53 | 46 | ||
| 54 | for (unsigned comp = 0; comp < 4; ++comp) { | 47 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 55 | float24* out = ((float24*)&ret) + semantics[comp]; | 48 | float24* out = ((float24*)&ret) + semantics[comp]; |
| @@ -65,19 +58,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { | |||
| 65 | index++; | 58 | index++; |
| 66 | } | 59 | } |
| 67 | 60 | ||
| 68 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation | 61 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing |
| 62 | // interpolation | ||
| 69 | for (unsigned i = 0; i < 4; ++i) { | 63 | for (unsigned i = 0; i < 4; ++i) { |
| 70 | ret.color[i] = float24::FromFloat32( | 64 | ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
| 71 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||
| 72 | } | 65 | } |
| 73 | 66 | ||
| 74 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " | 67 | LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " |
| 75 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", | 68 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", |
| 76 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 69 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), |
| 77 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | 70 | ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), |
| 78 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | 71 | ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), |
| 79 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), | 72 | ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
| 80 | ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | 73 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(), |
| 74 | ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | ||
| 81 | 75 | ||
| 82 | return ret; | 76 | return ret; |
| 83 | } | 77 | } |
| @@ -96,8 +90,9 @@ void ClearCache() { | |||
| 96 | void ShaderSetup::Setup() { | 90 | void ShaderSetup::Setup() { |
| 97 | #ifdef ARCHITECTURE_x86_64 | 91 | #ifdef ARCHITECTURE_x86_64 |
| 98 | if (VideoCore::g_shader_jit_enabled) { | 92 | if (VideoCore::g_shader_jit_enabled) { |
| 99 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 93 | u64 cache_key = |
| 100 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); | 94 | Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| 95 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)); | ||
| 101 | 96 | ||
| 102 | auto iter = shader_map.find(cache_key); | 97 | auto iter = shader_map.find(cache_key); |
| 103 | if (iter != shader_map.end()) { | 98 | if (iter != shader_map.end()) { |
| @@ -127,7 +122,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num | |||
| 127 | const auto& attribute_register_map = config.input_register_map; | 122 | const auto& attribute_register_map = config.input_register_map; |
| 128 | 123 | ||
| 129 | for (unsigned i = 0; i < num_attributes; i++) | 124 | for (unsigned i = 0; i < num_attributes; i++) |
| 130 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | 125 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; |
| 131 | 126 | ||
| 132 | state.conditional_code[0] = false; | 127 | state.conditional_code[0] = false; |
| 133 | state.conditional_code[1] = false; | 128 | state.conditional_code[1] = false; |
| @@ -140,10 +135,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num | |||
| 140 | #else | 135 | #else |
| 141 | RunInterpreter(setup, state, config.main_offset); | 136 | RunInterpreter(setup, state, config.main_offset); |
| 142 | #endif // ARCHITECTURE_x86_64 | 137 | #endif // ARCHITECTURE_x86_64 |
| 143 | |||
| 144 | } | 138 | } |
| 145 | 139 | ||
| 146 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { | 140 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, |
| 141 | const Regs::ShaderConfig& config, | ||
| 142 | const ShaderSetup& setup) { | ||
| 147 | UnitState<true> state; | 143 | UnitState<true> state; |
| 148 | 144 | ||
| 149 | state.debug.max_offset = 0; | 145 | state.debug.max_offset = 0; |
| @@ -155,7 +151,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ | |||
| 155 | boost::fill(state.registers.input, &dummy_register); | 151 | boost::fill(state.registers.input, &dummy_register); |
| 156 | 152 | ||
| 157 | for (unsigned i = 0; i < num_attributes; i++) | 153 | for (unsigned i = 0; i < num_attributes; i++) |
| 158 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | 154 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; |
| 159 | 155 | ||
| 160 | state.conditional_code[0] = false; | 156 | state.conditional_code[0] = false; |
| 161 | state.conditional_code[1] = false; | 157 | state.conditional_code[1] = false; |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index fee16df62..8858d67f8 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -9,16 +9,12 @@ | |||
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <type_traits> | 10 | #include <type_traits> |
| 11 | #include <vector> | 11 | #include <vector> |
| 12 | |||
| 13 | #include <boost/container/static_vector.hpp> | 12 | #include <boost/container/static_vector.hpp> |
| 14 | |||
| 15 | #include <nihstro/shader_bytecode.h> | 13 | #include <nihstro/shader_bytecode.h> |
| 16 | |||
| 17 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 18 | #include "common/common_funcs.h" | 15 | #include "common/common_funcs.h" |
| 19 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 20 | #include "common/vector_math.h" | 17 | #include "common/vector_math.h" |
| 21 | |||
| 22 | #include "video_core/pica.h" | 18 | #include "video_core/pica.h" |
| 23 | #include "video_core/pica_types.h" | 19 | #include "video_core/pica_types.h" |
| 24 | 20 | ||
| @@ -94,46 +90,46 @@ struct OutputRegisters { | |||
| 94 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); | 90 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); |
| 95 | 91 | ||
| 96 | // Helper structure used to keep track of data useful for inspection of shader emulation | 92 | // Helper structure used to keep track of data useful for inspection of shader emulation |
| 97 | template<bool full_debugging> | 93 | template <bool full_debugging> |
| 98 | struct DebugData; | 94 | struct DebugData; |
| 99 | 95 | ||
| 100 | template<> | 96 | template <> |
| 101 | struct DebugData<false> { | 97 | struct DebugData<false> { |
| 102 | // TODO: Hide these behind and interface and move them to DebugData<true> | 98 | // TODO: Hide these behind and interface and move them to DebugData<true> |
| 103 | u32 max_offset; // maximum program counter ever reached | 99 | u32 max_offset; // maximum program counter ever reached |
| 104 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | 100 | u32 max_opdesc_id; // maximum swizzle pattern index ever used |
| 105 | }; | 101 | }; |
| 106 | 102 | ||
| 107 | template<> | 103 | template <> |
| 108 | struct DebugData<true> { | 104 | struct DebugData<true> { |
| 109 | // Records store the input and output operands of a particular instruction. | 105 | // Records store the input and output operands of a particular instruction. |
| 110 | struct Record { | 106 | struct Record { |
| 111 | enum Type { | 107 | enum Type { |
| 112 | // Floating point arithmetic operands | 108 | // Floating point arithmetic operands |
| 113 | SRC1 = 0x1, | 109 | SRC1 = 0x1, |
| 114 | SRC2 = 0x2, | 110 | SRC2 = 0x2, |
| 115 | SRC3 = 0x4, | 111 | SRC3 = 0x4, |
| 116 | 112 | ||
| 117 | // Initial and final output operand value | 113 | // Initial and final output operand value |
| 118 | DEST_IN = 0x8, | 114 | DEST_IN = 0x8, |
| 119 | DEST_OUT = 0x10, | 115 | DEST_OUT = 0x10, |
| 120 | 116 | ||
| 121 | // Current and next instruction offset (in words) | 117 | // Current and next instruction offset (in words) |
| 122 | CUR_INSTR = 0x20, | 118 | CUR_INSTR = 0x20, |
| 123 | NEXT_INSTR = 0x40, | 119 | NEXT_INSTR = 0x40, |
| 124 | 120 | ||
| 125 | // Output address register value | 121 | // Output address register value |
| 126 | ADDR_REG_OUT = 0x80, | 122 | ADDR_REG_OUT = 0x80, |
| 127 | 123 | ||
| 128 | // Result of a comparison instruction | 124 | // Result of a comparison instruction |
| 129 | CMP_RESULT = 0x100, | 125 | CMP_RESULT = 0x100, |
| 130 | 126 | ||
| 131 | // Input values for conditional flow control instructions | 127 | // Input values for conditional flow control instructions |
| 132 | COND_BOOL_IN = 0x200, | 128 | COND_BOOL_IN = 0x200, |
| 133 | COND_CMP_IN = 0x400, | 129 | COND_CMP_IN = 0x400, |
| 134 | 130 | ||
| 135 | // Input values for a loop | 131 | // Input values for a loop |
| 136 | LOOP_INT_IN = 0x800, | 132 | LOOP_INT_IN = 0x800, |
| 137 | }; | 133 | }; |
| 138 | 134 | ||
| 139 | Math::Vec4<float24> src1; | 135 | Math::Vec4<float24> src1; |
| @@ -156,7 +152,7 @@ struct DebugData<true> { | |||
| 156 | unsigned mask = 0; | 152 | unsigned mask = 0; |
| 157 | }; | 153 | }; |
| 158 | 154 | ||
| 159 | u32 max_offset; // maximum program counter ever reached | 155 | u32 max_offset; // maximum program counter ever reached |
| 160 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | 156 | u32 max_opdesc_id; // maximum swizzle pattern index ever used |
| 161 | 157 | ||
| 162 | // List of records for each executed shader instruction | 158 | // List of records for each executed shader instruction |
| @@ -167,10 +163,10 @@ struct DebugData<true> { | |||
| 167 | using DebugDataRecord = DebugData<true>::Record; | 163 | using DebugDataRecord = DebugData<true>::Record; |
| 168 | 164 | ||
| 169 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. | 165 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. |
| 170 | template<DebugDataRecord::Type type, typename ValueType> | 166 | template <DebugDataRecord::Type type, typename ValueType> |
| 171 | inline void SetField(DebugDataRecord& record, ValueType value); | 167 | inline void SetField(DebugDataRecord& record, ValueType value); |
| 172 | 168 | ||
| 173 | template<> | 169 | template <> |
| 174 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | 170 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { |
| 175 | record.src1.x = value[0]; | 171 | record.src1.x = value[0]; |
| 176 | record.src1.y = value[1]; | 172 | record.src1.y = value[1]; |
| @@ -178,7 +174,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va | |||
| 178 | record.src1.w = value[3]; | 174 | record.src1.w = value[3]; |
| 179 | } | 175 | } |
| 180 | 176 | ||
| 181 | template<> | 177 | template <> |
| 182 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | 178 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { |
| 183 | record.src2.x = value[0]; | 179 | record.src2.x = value[0]; |
| 184 | record.src2.y = value[1]; | 180 | record.src2.y = value[1]; |
| @@ -186,7 +182,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va | |||
| 186 | record.src2.w = value[3]; | 182 | record.src2.w = value[3]; |
| 187 | } | 183 | } |
| 188 | 184 | ||
| 189 | template<> | 185 | template <> |
| 190 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | 186 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { |
| 191 | record.src3.x = value[0]; | 187 | record.src3.x = value[0]; |
| 192 | record.src3.y = value[1]; | 188 | record.src3.y = value[1]; |
| @@ -194,7 +190,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va | |||
| 194 | record.src3.w = value[3]; | 190 | record.src3.w = value[3]; |
| 195 | } | 191 | } |
| 196 | 192 | ||
| 197 | template<> | 193 | template <> |
| 198 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | 194 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { |
| 199 | record.dest_in.x = value[0]; | 195 | record.dest_in.x = value[0]; |
| 200 | record.dest_in.y = value[1]; | 196 | record.dest_in.y = value[1]; |
| @@ -202,7 +198,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* | |||
| 202 | record.dest_in.w = value[3]; | 198 | record.dest_in.w = value[3]; |
| 203 | } | 199 | } |
| 204 | 200 | ||
| 205 | template<> | 201 | template <> |
| 206 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | 202 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { |
| 207 | record.dest_out.x = value[0]; | 203 | record.dest_out.x = value[0]; |
| 208 | record.dest_out.y = value[1]; | 204 | record.dest_out.y = value[1]; |
| @@ -210,67 +206,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24 | |||
| 210 | record.dest_out.w = value[3]; | 206 | record.dest_out.w = value[3]; |
| 211 | } | 207 | } |
| 212 | 208 | ||
| 213 | template<> | 209 | template <> |
| 214 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | 210 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { |
| 215 | record.address_registers[0] = value[0]; | 211 | record.address_registers[0] = value[0]; |
| 216 | record.address_registers[1] = value[1]; | 212 | record.address_registers[1] = value[1]; |
| 217 | } | 213 | } |
| 218 | 214 | ||
| 219 | template<> | 215 | template <> |
| 220 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | 216 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { |
| 221 | record.conditional_code[0] = value[0]; | 217 | record.conditional_code[0] = value[0]; |
| 222 | record.conditional_code[1] = value[1]; | 218 | record.conditional_code[1] = value[1]; |
| 223 | } | 219 | } |
| 224 | 220 | ||
| 225 | template<> | 221 | template <> |
| 226 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | 222 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { |
| 227 | record.cond_bool = value; | 223 | record.cond_bool = value; |
| 228 | } | 224 | } |
| 229 | 225 | ||
| 230 | template<> | 226 | template <> |
| 231 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | 227 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { |
| 232 | record.cond_cmp[0] = value[0]; | 228 | record.cond_cmp[0] = value[0]; |
| 233 | record.cond_cmp[1] = value[1]; | 229 | record.cond_cmp[1] = value[1]; |
| 234 | } | 230 | } |
| 235 | 231 | ||
| 236 | template<> | 232 | template <> |
| 237 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | 233 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { |
| 238 | record.loop_int = value; | 234 | record.loop_int = value; |
| 239 | } | 235 | } |
| 240 | 236 | ||
| 241 | template<> | 237 | template <> |
| 242 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | 238 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { |
| 243 | record.instruction_offset = value; | 239 | record.instruction_offset = value; |
| 244 | } | 240 | } |
| 245 | 241 | ||
| 246 | template<> | 242 | template <> |
| 247 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | 243 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { |
| 248 | record.next_instruction = value; | 244 | record.next_instruction = value; |
| 249 | } | 245 | } |
| 250 | 246 | ||
| 251 | // Helper function to set debug information on the current shader iteration. | 247 | // Helper function to set debug information on the current shader iteration. |
| 252 | template<DebugDataRecord::Type type, typename ValueType> | 248 | template <DebugDataRecord::Type type, typename ValueType> |
| 253 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | 249 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { |
| 254 | // Debugging disabled => nothing to do | 250 | // Debugging disabled => nothing to do |
| 255 | } | 251 | } |
| 256 | 252 | ||
| 257 | template<DebugDataRecord::Type type, typename ValueType> | 253 | template <DebugDataRecord::Type type, typename ValueType> |
| 258 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | 254 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { |
| 259 | if (offset >= debug_data.records.size()) | 255 | if (offset >= debug_data.records.size()) |
| 260 | debug_data.records.resize(offset + 1); | 256 | debug_data.records.resize(offset + 1); |
| 261 | 257 | ||
| 262 | SetField<type, ValueType>(debug_data.records[offset], value); | 258 | SetField<type, ValueType>(debug_data.records[offset], value); |
| 263 | debug_data.records[offset].mask |= type; | 259 | debug_data.records[offset].mask |= type; |
| 264 | } | 260 | } |
| 265 | 261 | ||
| 266 | |||
| 267 | /** | 262 | /** |
| 268 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 263 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 269 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 264 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 270 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 265 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 271 | * here will make it easier for us to parallelize the shader processing later. | 266 | * here will make it easier for us to parallelize the shader processing later. |
| 272 | */ | 267 | */ |
| 273 | template<bool Debug> | 268 | template <bool Debug> |
| 274 | struct UnitState { | 269 | struct UnitState { |
| 275 | struct Registers { | 270 | struct Registers { |
| 276 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 271 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| @@ -293,10 +288,12 @@ struct UnitState { | |||
| 293 | static size_t InputOffset(const SourceRegister& reg) { | 288 | static size_t InputOffset(const SourceRegister& reg) { |
| 294 | switch (reg.GetRegisterType()) { | 289 | switch (reg.GetRegisterType()) { |
| 295 | case RegisterType::Input: | 290 | case RegisterType::Input: |
| 296 | return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 291 | return offsetof(UnitState, registers.input) + |
| 292 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 297 | 293 | ||
| 298 | case RegisterType::Temporary: | 294 | case RegisterType::Temporary: |
| 299 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 295 | return offsetof(UnitState, registers.temporary) + |
| 296 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 300 | 297 | ||
| 301 | default: | 298 | default: |
| 302 | UNREACHABLE(); | 299 | UNREACHABLE(); |
| @@ -307,10 +304,12 @@ struct UnitState { | |||
| 307 | static size_t OutputOffset(const DestRegister& reg) { | 304 | static size_t OutputOffset(const DestRegister& reg) { |
| 308 | switch (reg.GetRegisterType()) { | 305 | switch (reg.GetRegisterType()) { |
| 309 | case RegisterType::Output: | 306 | case RegisterType::Output: |
| 310 | return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 307 | return offsetof(UnitState, output_registers.value) + |
| 308 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 311 | 309 | ||
| 312 | case RegisterType::Temporary: | 310 | case RegisterType::Temporary: |
| 313 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 311 | return offsetof(UnitState, registers.temporary) + |
| 312 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | ||
| 314 | 313 | ||
| 315 | default: | 314 | default: |
| 316 | UNREACHABLE(); | 315 | UNREACHABLE(); |
| @@ -336,13 +335,13 @@ struct ShaderSetup { | |||
| 336 | static size_t UniformOffset(RegisterType type, unsigned index) { | 335 | static size_t UniformOffset(RegisterType type, unsigned index) { |
| 337 | switch (type) { | 336 | switch (type) { |
| 338 | case RegisterType::FloatUniform: | 337 | case RegisterType::FloatUniform: |
| 339 | return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); | 338 | return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); |
| 340 | 339 | ||
| 341 | case RegisterType::BoolUniform: | 340 | case RegisterType::BoolUniform: |
| 342 | return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); | 341 | return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); |
| 343 | 342 | ||
| 344 | case RegisterType::IntUniform: | 343 | case RegisterType::IntUniform: |
| 345 | return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); | 344 | return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); |
| 346 | 345 | ||
| 347 | default: | 346 | default: |
| 348 | UNREACHABLE(); | 347 | UNREACHABLE(); |
| @@ -354,8 +353,8 @@ struct ShaderSetup { | |||
| 354 | std::array<u32, 1024> swizzle_data; | 353 | std::array<u32, 1024> swizzle_data; |
| 355 | 354 | ||
| 356 | /** | 355 | /** |
| 357 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per | 356 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once |
| 358 | * vertex, which would happen within the `Run` function). | 357 | * per vertex, which would happen within the `Run` function). |
| 359 | */ | 358 | */ |
| 360 | void Setup(); | 359 | void Setup(); |
| 361 | 360 | ||
| @@ -375,8 +374,8 @@ struct ShaderSetup { | |||
| 375 | * @param setup Setup object for the shader pipeline | 374 | * @param setup Setup object for the shader pipeline |
| 376 | * @return Debug information for this shader with regards to the given vertex | 375 | * @return Debug information for this shader with regards to the given vertex |
| 377 | */ | 376 | */ |
| 378 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); | 377 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, |
| 379 | 378 | const Regs::ShaderConfig& config, const ShaderSetup& setup); | |
| 380 | }; | 379 | }; |
| 381 | 380 | ||
| 382 | } // namespace Shader | 381 | } // namespace Shader |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index f6c86a759..501d00b6b 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -6,14 +6,11 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cmath> | 7 | #include <cmath> |
| 8 | #include <numeric> | 8 | #include <numeric> |
| 9 | |||
| 10 | #include <nihstro/shader_bytecode.h> | 9 | #include <nihstro/shader_bytecode.h> |
| 11 | |||
| 12 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 15 | #include "common/vector_math.h" | 13 | #include "common/vector_math.h" |
| 16 | |||
| 17 | #include "video_core/pica_state.h" | 14 | #include "video_core/pica_state.h" |
| 18 | #include "video_core/pica_types.h" | 15 | #include "video_core/pica_types.h" |
| 19 | #include "video_core/shader/shader.h" | 16 | #include "video_core/shader/shader.h" |
| @@ -40,7 +37,7 @@ struct CallStackElement { | |||
| 40 | u32 loop_address; // The address where we'll return to after each loop iteration | 37 | u32 loop_address; // The address where we'll return to after each loop iteration |
| 41 | }; | 38 | }; |
| 42 | 39 | ||
| 43 | template<bool Debug> | 40 | template <bool Debug> |
| 44 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { | 41 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { |
| 45 | // TODO: Is there a maximal size for this? | 42 | // TODO: Is there a maximal size for this? |
| 46 | boost::container::static_vector<CallStackElement, 16> call_stack; | 43 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| @@ -74,14 +71,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 74 | } | 71 | } |
| 75 | } | 72 | } |
| 76 | 73 | ||
| 77 | const Instruction instr = { program_code[program_counter] }; | 74 | const Instruction instr = {program_code[program_counter]}; |
| 78 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 75 | const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; |
| 79 | 76 | ||
| 80 | auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, | 77 | auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, |
| 81 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 78 | u32 num_instructions, u32 return_offset, |
| 82 | program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 79 | u8 repeat_count, u8 loop_increment) { |
| 80 | // -1 to make sure when incrementing the PC we end up at the correct offset | ||
| 81 | program_counter = offset - 1; | ||
| 83 | ASSERT(call_stack.size() < call_stack.capacity()); | 82 | ASSERT(call_stack.size() < call_stack.capacity()); |
| 84 | call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 83 | call_stack.push_back( |
| 84 | {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); | ||
| 85 | }; | 85 | }; |
| 86 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); | 86 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); |
| 87 | if (iteration > 0) | 87 | if (iteration > 0) |
| @@ -106,24 +106,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 106 | }; | 106 | }; |
| 107 | 107 | ||
| 108 | switch (instr.opcode.Value().GetInfo().type) { | 108 | switch (instr.opcode.Value().GetInfo().type) { |
| 109 | case OpCode::Type::Arithmetic: | 109 | case OpCode::Type::Arithmetic: { |
| 110 | { | 110 | const bool is_inverted = |
| 111 | const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | 111 | (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); |
| 112 | 112 | ||
| 113 | const int address_offset = (instr.common.address_register_index == 0) | 113 | const int address_offset = |
| 114 | ? 0 : state.address_registers[instr.common.address_register_index - 1]; | 114 | (instr.common.address_register_index == 0) |
| 115 | ? 0 | ||
| 116 | : state.address_registers[instr.common.address_register_index - 1]; | ||
| 115 | 117 | ||
| 116 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); | 118 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + |
| 117 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); | 119 | (!is_inverted * address_offset)); |
| 120 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + | ||
| 121 | (is_inverted * address_offset)); | ||
| 118 | 122 | ||
| 119 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | 123 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |
| 120 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | 124 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |
| 121 | 125 | ||
| 122 | float24 src1[4] = { | 126 | float24 src1[4] = { |
| 123 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 127 | src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], |
| 124 | src1_[(int)swizzle.GetSelectorSrc1(1)], | 128 | src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], |
| 125 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 126 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 127 | }; | 129 | }; |
| 128 | if (negate_src1) { | 130 | if (negate_src1) { |
| 129 | src1[0] = src1[0] * float24::FromFloat32(-1); | 131 | src1[0] = src1[0] * float24::FromFloat32(-1); |
| @@ -132,10 +134,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 132 | src1[3] = src1[3] * float24::FromFloat32(-1); | 134 | src1[3] = src1[3] * float24::FromFloat32(-1); |
| 133 | } | 135 | } |
| 134 | float24 src2[4] = { | 136 | float24 src2[4] = { |
| 135 | src2_[(int)swizzle.GetSelectorSrc2(0)], | 137 | src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], |
| 136 | src2_[(int)swizzle.GetSelectorSrc2(1)], | 138 | src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], |
| 137 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 138 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 139 | }; | 139 | }; |
| 140 | if (negate_src2) { | 140 | if (negate_src2) { |
| 141 | src2[0] = src2[0] * float24::FromFloat32(-1); | 141 | src2[0] = src2[0] * float24::FromFloat32(-1); |
| @@ -144,15 +144,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 144 | src2[3] = src2[3] * float24::FromFloat32(-1); | 144 | src2[3] = src2[3] * float24::FromFloat32(-1); |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] | 147 | float24* dest = |
| 148 | : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | 148 | (instr.common.dest.Value() < 0x10) |
| 149 | : dummy_vec4_float24; | 149 | ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] |
| 150 | : (instr.common.dest.Value() < 0x20) | ||
| 151 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | ||
| 152 | : dummy_vec4_float24; | ||
| 150 | 153 | ||
| 151 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | 154 | state.debug.max_opdesc_id = |
| 155 | std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id); | ||
| 152 | 156 | ||
| 153 | switch (instr.opcode.Value().EffectiveOpCode()) { | 157 | switch (instr.opcode.Value().EffectiveOpCode()) { |
| 154 | case OpCode::Id::ADD: | 158 | case OpCode::Id::ADD: { |
| 155 | { | ||
| 156 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 159 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 157 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 160 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 158 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 161 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| @@ -166,8 +169,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 166 | break; | 169 | break; |
| 167 | } | 170 | } |
| 168 | 171 | ||
| 169 | case OpCode::Id::MUL: | 172 | case OpCode::Id::MUL: { |
| 170 | { | ||
| 171 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 173 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 172 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 174 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 173 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 175 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| @@ -228,8 +230,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 228 | case OpCode::Id::DP3: | 230 | case OpCode::Id::DP3: |
| 229 | case OpCode::Id::DP4: | 231 | case OpCode::Id::DP4: |
| 230 | case OpCode::Id::DPH: | 232 | case OpCode::Id::DPH: |
| 231 | case OpCode::Id::DPHI: | 233 | case OpCode::Id::DPHI: { |
| 232 | { | ||
| 233 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 234 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 234 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 235 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 235 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 236 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| @@ -239,7 +240,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 239 | src1[3] = float24::FromFloat32(1.0f); | 240 | src1[3] = float24::FromFloat32(1.0f); |
| 240 | 241 | ||
| 241 | int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; | 242 | int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; |
| 242 | float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f)); | 243 | float24 dot = std::inner_product(src1, src1 + num_components, src2, |
| 244 | float24::FromFloat32(0.f)); | ||
| 243 | 245 | ||
| 244 | for (int i = 0; i < 4; ++i) { | 246 | for (int i = 0; i < 4; ++i) { |
| 245 | if (!swizzle.DestComponentEnabled(i)) | 247 | if (!swizzle.DestComponentEnabled(i)) |
| @@ -252,8 +254,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 252 | } | 254 | } |
| 253 | 255 | ||
| 254 | // Reciprocal | 256 | // Reciprocal |
| 255 | case OpCode::Id::RCP: | 257 | case OpCode::Id::RCP: { |
| 256 | { | ||
| 257 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 258 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 258 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 259 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 259 | float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); | 260 | float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); |
| @@ -268,8 +269,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 268 | } | 269 | } |
| 269 | 270 | ||
| 270 | // Reciprocal Square Root | 271 | // Reciprocal Square Root |
| 271 | case OpCode::Id::RSQ: | 272 | case OpCode::Id::RSQ: { |
| 272 | { | ||
| 273 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 273 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 274 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 274 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 275 | float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); | 275 | float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); |
| @@ -283,8 +283,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 283 | break; | 283 | break; |
| 284 | } | 284 | } |
| 285 | 285 | ||
| 286 | case OpCode::Id::MOVA: | 286 | case OpCode::Id::MOVA: { |
| 287 | { | ||
| 288 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 287 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 289 | for (int i = 0; i < 2; ++i) { | 288 | for (int i = 0; i < 2; ++i) { |
| 290 | if (!swizzle.DestComponentEnabled(i)) | 289 | if (!swizzle.DestComponentEnabled(i)) |
| @@ -293,12 +292,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 293 | // TODO: Figure out how the rounding is done on hardware | 292 | // TODO: Figure out how the rounding is done on hardware |
| 294 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | 293 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |
| 295 | } | 294 | } |
| 296 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); | 295 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, |
| 296 | state.address_registers); | ||
| 297 | break; | 297 | break; |
| 298 | } | 298 | } |
| 299 | 299 | ||
| 300 | case OpCode::Id::MOV: | 300 | case OpCode::Id::MOV: { |
| 301 | { | ||
| 302 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 301 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 303 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 302 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 304 | for (int i = 0; i < 4; ++i) { | 303 | for (int i = 0; i < 4; ++i) { |
| @@ -320,7 +319,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 320 | if (!swizzle.DestComponentEnabled(i)) | 319 | if (!swizzle.DestComponentEnabled(i)) |
| 321 | continue; | 320 | continue; |
| 322 | 321 | ||
| 323 | dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 322 | dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) |
| 323 | : float24::FromFloat32(0.0f); | ||
| 324 | } | 324 | } |
| 325 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 325 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 326 | break; | 326 | break; |
| @@ -334,7 +334,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 334 | if (!swizzle.DestComponentEnabled(i)) | 334 | if (!swizzle.DestComponentEnabled(i)) |
| 335 | continue; | 335 | continue; |
| 336 | 336 | ||
| 337 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 337 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) |
| 338 | : float24::FromFloat32(0.0f); | ||
| 338 | } | 339 | } |
| 339 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 340 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 340 | break; | 341 | break; |
| @@ -349,40 +350,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 349 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | 350 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |
| 350 | 351 | ||
| 351 | switch (op) { | 352 | switch (op) { |
| 352 | case Instruction::Common::CompareOpType::Equal: | 353 | case Instruction::Common::CompareOpType::Equal: |
| 353 | state.conditional_code[i] = (src1[i] == src2[i]); | 354 | state.conditional_code[i] = (src1[i] == src2[i]); |
| 354 | break; | 355 | break; |
| 355 | 356 | ||
| 356 | case Instruction::Common::CompareOpType::NotEqual: | 357 | case Instruction::Common::CompareOpType::NotEqual: |
| 357 | state.conditional_code[i] = (src1[i] != src2[i]); | 358 | state.conditional_code[i] = (src1[i] != src2[i]); |
| 358 | break; | 359 | break; |
| 359 | 360 | ||
| 360 | case Instruction::Common::CompareOpType::LessThan: | 361 | case Instruction::Common::CompareOpType::LessThan: |
| 361 | state.conditional_code[i] = (src1[i] < src2[i]); | 362 | state.conditional_code[i] = (src1[i] < src2[i]); |
| 362 | break; | 363 | break; |
| 363 | 364 | ||
| 364 | case Instruction::Common::CompareOpType::LessEqual: | 365 | case Instruction::Common::CompareOpType::LessEqual: |
| 365 | state.conditional_code[i] = (src1[i] <= src2[i]); | 366 | state.conditional_code[i] = (src1[i] <= src2[i]); |
| 366 | break; | 367 | break; |
| 367 | 368 | ||
| 368 | case Instruction::Common::CompareOpType::GreaterThan: | 369 | case Instruction::Common::CompareOpType::GreaterThan: |
| 369 | state.conditional_code[i] = (src1[i] > src2[i]); | 370 | state.conditional_code[i] = (src1[i] > src2[i]); |
| 370 | break; | 371 | break; |
| 371 | 372 | ||
| 372 | case Instruction::Common::CompareOpType::GreaterEqual: | 373 | case Instruction::Common::CompareOpType::GreaterEqual: |
| 373 | state.conditional_code[i] = (src1[i] >= src2[i]); | 374 | state.conditional_code[i] = (src1[i] >= src2[i]); |
| 374 | break; | 375 | break; |
| 375 | 376 | ||
| 376 | default: | 377 | default: |
| 377 | LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); | 378 | LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); |
| 378 | break; | 379 | break; |
| 379 | } | 380 | } |
| 380 | } | 381 | } |
| 381 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | 382 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); |
| 382 | break; | 383 | break; |
| 383 | 384 | ||
| 384 | case OpCode::Id::EX2: | 385 | case OpCode::Id::EX2: { |
| 385 | { | ||
| 386 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 386 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 387 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 387 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 388 | 388 | ||
| @@ -399,8 +399,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 399 | break; | 399 | break; |
| 400 | } | 400 | } |
| 401 | 401 | ||
| 402 | case OpCode::Id::LG2: | 402 | case OpCode::Id::LG2: { |
| 403 | { | ||
| 404 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 403 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 405 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 404 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 406 | 405 | ||
| @@ -419,7 +418,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 419 | 418 | ||
| 420 | default: | 419 | default: |
| 421 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | 420 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", |
| 422 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 421 | (int)instr.opcode.Value().EffectiveOpCode(), |
| 422 | instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 423 | DEBUG_ASSERT(false); | 423 | DEBUG_ASSERT(false); |
| 424 | break; | 424 | break; |
| 425 | } | 425 | } |
| @@ -427,30 +427,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 427 | break; | 427 | break; |
| 428 | } | 428 | } |
| 429 | 429 | ||
| 430 | case OpCode::Type::MultiplyAdd: | 430 | case OpCode::Type::MultiplyAdd: { |
| 431 | { | ||
| 432 | if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || | 431 | if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || |
| 433 | (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { | 432 | (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { |
| 434 | const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]); | 433 | const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>( |
| 434 | &swizzle_data[instr.mad.operand_desc_id]); | ||
| 435 | 435 | ||
| 436 | bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); | 436 | bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); |
| 437 | 437 | ||
| 438 | const int address_offset = (instr.mad.address_register_index == 0) | 438 | const int address_offset = |
| 439 | ? 0 : state.address_registers[instr.mad.address_register_index - 1]; | 439 | (instr.mad.address_register_index == 0) |
| 440 | ? 0 | ||
| 441 | : state.address_registers[instr.mad.address_register_index - 1]; | ||
| 440 | 442 | ||
| 441 | const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); | 443 | const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); |
| 442 | const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); | 444 | const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + |
| 443 | const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); | 445 | (!is_inverted * address_offset)); |
| 446 | const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + | ||
| 447 | (is_inverted * address_offset)); | ||
| 444 | 448 | ||
| 445 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | 449 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |
| 446 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | 450 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |
| 447 | const bool negate_src3 = ((bool)swizzle.negate_src3 != false); | 451 | const bool negate_src3 = ((bool)swizzle.negate_src3 != false); |
| 448 | 452 | ||
| 449 | float24 src1[4] = { | 453 | float24 src1[4] = { |
| 450 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 454 | src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], |
| 451 | src1_[(int)swizzle.GetSelectorSrc1(1)], | 455 | src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], |
| 452 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 453 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 454 | }; | 456 | }; |
| 455 | if (negate_src1) { | 457 | if (negate_src1) { |
| 456 | src1[0] = src1[0] * float24::FromFloat32(-1); | 458 | src1[0] = src1[0] * float24::FromFloat32(-1); |
| @@ -459,10 +461,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 459 | src1[3] = src1[3] * float24::FromFloat32(-1); | 461 | src1[3] = src1[3] * float24::FromFloat32(-1); |
| 460 | } | 462 | } |
| 461 | float24 src2[4] = { | 463 | float24 src2[4] = { |
| 462 | src2_[(int)swizzle.GetSelectorSrc2(0)], | 464 | src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], |
| 463 | src2_[(int)swizzle.GetSelectorSrc2(1)], | 465 | src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], |
| 464 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 465 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 466 | }; | 466 | }; |
| 467 | if (negate_src2) { | 467 | if (negate_src2) { |
| 468 | src2[0] = src2[0] * float24::FromFloat32(-1); | 468 | src2[0] = src2[0] * float24::FromFloat32(-1); |
| @@ -471,10 +471,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 471 | src2[3] = src2[3] * float24::FromFloat32(-1); | 471 | src2[3] = src2[3] * float24::FromFloat32(-1); |
| 472 | } | 472 | } |
| 473 | float24 src3[4] = { | 473 | float24 src3[4] = { |
| 474 | src3_[(int)swizzle.GetSelectorSrc3(0)], | 474 | src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)], |
| 475 | src3_[(int)swizzle.GetSelectorSrc3(1)], | 475 | src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)], |
| 476 | src3_[(int)swizzle.GetSelectorSrc3(2)], | ||
| 477 | src3_[(int)swizzle.GetSelectorSrc3(3)], | ||
| 478 | }; | 476 | }; |
| 479 | if (negate_src3) { | 477 | if (negate_src3) { |
| 480 | src3[0] = src3[0] * float24::FromFloat32(-1); | 478 | src3[0] = src3[0] * float24::FromFloat32(-1); |
| @@ -483,9 +481,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 483 | src3[3] = src3[3] * float24::FromFloat32(-1); | 481 | src3[3] = src3[3] * float24::FromFloat32(-1); |
| 484 | } | 482 | } |
| 485 | 483 | ||
| 486 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] | 484 | float24* dest = |
| 487 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 485 | (instr.mad.dest.Value() < 0x10) |
| 488 | : dummy_vec4_float24; | 486 | ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] |
| 487 | : (instr.mad.dest.Value() < 0x20) | ||
| 488 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | ||
| 489 | : dummy_vec4_float24; | ||
| 489 | 490 | ||
| 490 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 491 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 491 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 492 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| @@ -500,16 +501,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 500 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 501 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 501 | } else { | 502 | } else { |
| 502 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | 503 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", |
| 503 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 504 | (int)instr.opcode.Value().EffectiveOpCode(), |
| 505 | instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 504 | } | 506 | } |
| 505 | break; | 507 | break; |
| 506 | } | 508 | } |
| 507 | 509 | ||
| 508 | default: | 510 | default: { |
| 509 | { | 511 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, |
| 510 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | 512 | Instruction::FlowControlType flow_control) { |
| 511 | bool results[2] = { refx == state.conditional_code[0], | 513 | bool results[2] = {refx == state.conditional_code[0], |
| 512 | refy == state.conditional_code[1] }; | 514 | refy == state.conditional_code[1]}; |
| 513 | 515 | ||
| 514 | switch (flow_control.op) { | 516 | switch (flow_control.op) { |
| 515 | case flow_control.Or: | 517 | case flow_control.Or: |
| @@ -533,44 +535,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 533 | break; | 535 | break; |
| 534 | 536 | ||
| 535 | case OpCode::Id::JMPC: | 537 | case OpCode::Id::JMPC: |
| 536 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 538 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, |
| 537 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 539 | state.conditional_code); |
| 540 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, | ||
| 541 | instr.flow_control)) { | ||
| 538 | program_counter = instr.flow_control.dest_offset - 1; | 542 | program_counter = instr.flow_control.dest_offset - 1; |
| 539 | } | 543 | } |
| 540 | break; | 544 | break; |
| 541 | 545 | ||
| 542 | case OpCode::Id::JMPU: | 546 | case OpCode::Id::JMPU: |
| 543 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 547 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 548 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 544 | 549 | ||
| 545 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { | 550 | if (uniforms.b[instr.flow_control.bool_uniform_id] == |
| 551 | !(instr.flow_control.num_instructions & 1)) { | ||
| 546 | program_counter = instr.flow_control.dest_offset - 1; | 552 | program_counter = instr.flow_control.dest_offset - 1; |
| 547 | } | 553 | } |
| 548 | break; | 554 | break; |
| 549 | 555 | ||
| 550 | case OpCode::Id::CALL: | 556 | case OpCode::Id::CALL: |
| 551 | call(state, | 557 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 552 | instr.flow_control.dest_offset, | ||
| 553 | instr.flow_control.num_instructions, | ||
| 554 | program_counter + 1, 0, 0); | 558 | program_counter + 1, 0, 0); |
| 555 | break; | 559 | break; |
| 556 | 560 | ||
| 557 | case OpCode::Id::CALLU: | 561 | case OpCode::Id::CALLU: |
| 558 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 562 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 563 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 559 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 564 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 560 | call(state, | 565 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 561 | instr.flow_control.dest_offset, | 566 | program_counter + 1, 0, 0); |
| 562 | instr.flow_control.num_instructions, | ||
| 563 | program_counter + 1, 0, 0); | ||
| 564 | } | 567 | } |
| 565 | break; | 568 | break; |
| 566 | 569 | ||
| 567 | case OpCode::Id::CALLC: | 570 | case OpCode::Id::CALLC: |
| 568 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 571 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, |
| 569 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 572 | state.conditional_code); |
| 570 | call(state, | 573 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, |
| 571 | instr.flow_control.dest_offset, | 574 | instr.flow_control)) { |
| 572 | instr.flow_control.num_instructions, | 575 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 573 | program_counter + 1, 0, 0); | 576 | program_counter + 1, 0, 0); |
| 574 | } | 577 | } |
| 575 | break; | 578 | break; |
| 576 | 579 | ||
| @@ -578,43 +581,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 578 | break; | 581 | break; |
| 579 | 582 | ||
| 580 | case OpCode::Id::IFU: | 583 | case OpCode::Id::IFU: |
| 581 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 584 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 585 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 582 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 586 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 583 | call(state, | 587 | call(state, program_counter + 1, |
| 584 | program_counter + 1, | ||
| 585 | instr.flow_control.dest_offset - program_counter - 1, | 588 | instr.flow_control.dest_offset - program_counter - 1, |
| 586 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 589 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 590 | 0); | ||
| 587 | } else { | 591 | } else { |
| 588 | call(state, | 592 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 589 | instr.flow_control.dest_offset, | 593 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 590 | instr.flow_control.num_instructions, | 594 | 0); |
| 591 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 592 | } | 595 | } |
| 593 | 596 | ||
| 594 | break; | 597 | break; |
| 595 | 598 | ||
| 596 | case OpCode::Id::IFC: | 599 | case OpCode::Id::IFC: { |
| 597 | { | ||
| 598 | // TODO: Do we need to consider swizzlers here? | 600 | // TODO: Do we need to consider swizzlers here? |
| 599 | 601 | ||
| 600 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 602 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, |
| 601 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 603 | state.conditional_code); |
| 602 | call(state, | 604 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, |
| 603 | program_counter + 1, | 605 | instr.flow_control)) { |
| 606 | call(state, program_counter + 1, | ||
| 604 | instr.flow_control.dest_offset - program_counter - 1, | 607 | instr.flow_control.dest_offset - program_counter - 1, |
| 605 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 608 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 609 | 0); | ||
| 606 | } else { | 610 | } else { |
| 607 | call(state, | 611 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 608 | instr.flow_control.dest_offset, | 612 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 609 | instr.flow_control.num_instructions, | 613 | 0); |
| 610 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 611 | } | 614 | } |
| 612 | 615 | ||
| 613 | break; | 616 | break; |
| 614 | } | 617 | } |
| 615 | 618 | ||
| 616 | case OpCode::Id::LOOP: | 619 | case OpCode::Id::LOOP: { |
| 617 | { | ||
| 618 | Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, | 620 | Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, |
| 619 | uniforms.i[instr.flow_control.int_uniform_id].y, | 621 | uniforms.i[instr.flow_control.int_uniform_id].y, |
| 620 | uniforms.i[instr.flow_control.int_uniform_id].z, | 622 | uniforms.i[instr.flow_control.int_uniform_id].z, |
| @@ -622,18 +624,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 622 | state.address_registers[2] = loop_param.y; | 624 | state.address_registers[2] = loop_param.y; |
| 623 | 625 | ||
| 624 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | 626 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); |
| 625 | call(state, | 627 | call(state, program_counter + 1, |
| 626 | program_counter + 1, | ||
| 627 | instr.flow_control.dest_offset - program_counter + 1, | 628 | instr.flow_control.dest_offset - program_counter + 1, |
| 628 | instr.flow_control.dest_offset + 1, | 629 | instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); |
| 629 | loop_param.x, | ||
| 630 | loop_param.z); | ||
| 631 | break; | 630 | break; |
| 632 | } | 631 | } |
| 633 | 632 | ||
| 634 | default: | 633 | default: |
| 635 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 634 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 636 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 635 | (int)instr.opcode.Value().EffectiveOpCode(), |
| 636 | instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 637 | break; | 637 | break; |
| 638 | } | 638 | } |
| 639 | 639 | ||
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index bb3ce1c6e..48ede0a2e 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -8,9 +8,10 @@ namespace Pica { | |||
| 8 | 8 | ||
| 9 | namespace Shader { | 9 | namespace Shader { |
| 10 | 10 | ||
| 11 | template <bool Debug> struct UnitState; | 11 | template <bool Debug> |
| 12 | struct UnitState; | ||
| 12 | 13 | ||
| 13 | template<bool Debug> | 14 | template <bool Debug> |
| 14 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); | 15 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); |
| 15 | 16 | ||
| 16 | } // namespace | 17 | } // namespace |
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 43e7e6b4c..211c703ab 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -5,20 +5,16 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cmath> | 6 | #include <cmath> |
| 7 | #include <cstdint> | 7 | #include <cstdint> |
| 8 | #include <xmmintrin.h> | ||
| 9 | |||
| 10 | #include <nihstro/shader_bytecode.h> | 8 | #include <nihstro/shader_bytecode.h> |
| 11 | 9 | #include <xmmintrin.h> | |
| 12 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 14 | #include "common/vector_math.h" | 12 | #include "common/vector_math.h" |
| 15 | #include "common/x64/abi.h" | 13 | #include "common/x64/abi.h" |
| 16 | #include "common/x64/cpu_detect.h" | 14 | #include "common/x64/cpu_detect.h" |
| 17 | #include "common/x64/emitter.h" | 15 | #include "common/x64/emitter.h" |
| 18 | |||
| 19 | #include "shader.h" | 16 | #include "shader.h" |
| 20 | #include "shader_jit_x64.h" | 17 | #include "shader_jit_x64.h" |
| 21 | |||
| 22 | #include "video_core/pica_state.h" | 18 | #include "video_core/pica_state.h" |
| 23 | #include "video_core/pica_types.h" | 19 | #include "video_core/pica_types.h" |
| 24 | 20 | ||
| @@ -31,70 +27,70 @@ using namespace Gen; | |||
| 31 | typedef void (JitShader::*JitFunction)(Instruction instr); | 27 | typedef void (JitShader::*JitFunction)(Instruction instr); |
| 32 | 28 | ||
| 33 | const JitFunction instr_table[64] = { | 29 | const JitFunction instr_table[64] = { |
| 34 | &JitShader::Compile_ADD, // add | 30 | &JitShader::Compile_ADD, // add |
| 35 | &JitShader::Compile_DP3, // dp3 | 31 | &JitShader::Compile_DP3, // dp3 |
| 36 | &JitShader::Compile_DP4, // dp4 | 32 | &JitShader::Compile_DP4, // dp4 |
| 37 | &JitShader::Compile_DPH, // dph | 33 | &JitShader::Compile_DPH, // dph |
| 38 | nullptr, // unknown | 34 | nullptr, // unknown |
| 39 | &JitShader::Compile_EX2, // ex2 | 35 | &JitShader::Compile_EX2, // ex2 |
| 40 | &JitShader::Compile_LG2, // lg2 | 36 | &JitShader::Compile_LG2, // lg2 |
| 41 | nullptr, // unknown | 37 | nullptr, // unknown |
| 42 | &JitShader::Compile_MUL, // mul | 38 | &JitShader::Compile_MUL, // mul |
| 43 | &JitShader::Compile_SGE, // sge | 39 | &JitShader::Compile_SGE, // sge |
| 44 | &JitShader::Compile_SLT, // slt | 40 | &JitShader::Compile_SLT, // slt |
| 45 | &JitShader::Compile_FLR, // flr | 41 | &JitShader::Compile_FLR, // flr |
| 46 | &JitShader::Compile_MAX, // max | 42 | &JitShader::Compile_MAX, // max |
| 47 | &JitShader::Compile_MIN, // min | 43 | &JitShader::Compile_MIN, // min |
| 48 | &JitShader::Compile_RCP, // rcp | 44 | &JitShader::Compile_RCP, // rcp |
| 49 | &JitShader::Compile_RSQ, // rsq | 45 | &JitShader::Compile_RSQ, // rsq |
| 50 | nullptr, // unknown | 46 | nullptr, // unknown |
| 51 | nullptr, // unknown | 47 | nullptr, // unknown |
| 52 | &JitShader::Compile_MOVA, // mova | 48 | &JitShader::Compile_MOVA, // mova |
| 53 | &JitShader::Compile_MOV, // mov | 49 | &JitShader::Compile_MOV, // mov |
| 54 | nullptr, // unknown | 50 | nullptr, // unknown |
| 55 | nullptr, // unknown | 51 | nullptr, // unknown |
| 56 | nullptr, // unknown | 52 | nullptr, // unknown |
| 57 | nullptr, // unknown | 53 | nullptr, // unknown |
| 58 | &JitShader::Compile_DPH, // dphi | 54 | &JitShader::Compile_DPH, // dphi |
| 59 | nullptr, // unknown | 55 | nullptr, // unknown |
| 60 | &JitShader::Compile_SGE, // sgei | 56 | &JitShader::Compile_SGE, // sgei |
| 61 | &JitShader::Compile_SLT, // slti | 57 | &JitShader::Compile_SLT, // slti |
| 62 | nullptr, // unknown | 58 | nullptr, // unknown |
| 63 | nullptr, // unknown | 59 | nullptr, // unknown |
| 64 | nullptr, // unknown | 60 | nullptr, // unknown |
| 65 | nullptr, // unknown | 61 | nullptr, // unknown |
| 66 | nullptr, // unknown | 62 | nullptr, // unknown |
| 67 | &JitShader::Compile_NOP, // nop | 63 | &JitShader::Compile_NOP, // nop |
| 68 | &JitShader::Compile_END, // end | 64 | &JitShader::Compile_END, // end |
| 69 | nullptr, // break | 65 | nullptr, // break |
| 70 | &JitShader::Compile_CALL, // call | 66 | &JitShader::Compile_CALL, // call |
| 71 | &JitShader::Compile_CALLC, // callc | 67 | &JitShader::Compile_CALLC, // callc |
| 72 | &JitShader::Compile_CALLU, // callu | 68 | &JitShader::Compile_CALLU, // callu |
| 73 | &JitShader::Compile_IF, // ifu | 69 | &JitShader::Compile_IF, // ifu |
| 74 | &JitShader::Compile_IF, // ifc | 70 | &JitShader::Compile_IF, // ifc |
| 75 | &JitShader::Compile_LOOP, // loop | 71 | &JitShader::Compile_LOOP, // loop |
| 76 | nullptr, // emit | 72 | nullptr, // emit |
| 77 | nullptr, // sete | 73 | nullptr, // sete |
| 78 | &JitShader::Compile_JMP, // jmpc | 74 | &JitShader::Compile_JMP, // jmpc |
| 79 | &JitShader::Compile_JMP, // jmpu | 75 | &JitShader::Compile_JMP, // jmpu |
| 80 | &JitShader::Compile_CMP, // cmp | 76 | &JitShader::Compile_CMP, // cmp |
| 81 | &JitShader::Compile_CMP, // cmp | 77 | &JitShader::Compile_CMP, // cmp |
| 82 | &JitShader::Compile_MAD, // madi | 78 | &JitShader::Compile_MAD, // madi |
| 83 | &JitShader::Compile_MAD, // madi | 79 | &JitShader::Compile_MAD, // madi |
| 84 | &JitShader::Compile_MAD, // madi | 80 | &JitShader::Compile_MAD, // madi |
| 85 | &JitShader::Compile_MAD, // madi | 81 | &JitShader::Compile_MAD, // madi |
| 86 | &JitShader::Compile_MAD, // madi | 82 | &JitShader::Compile_MAD, // madi |
| 87 | &JitShader::Compile_MAD, // madi | 83 | &JitShader::Compile_MAD, // madi |
| 88 | &JitShader::Compile_MAD, // madi | 84 | &JitShader::Compile_MAD, // madi |
| 89 | &JitShader::Compile_MAD, // madi | 85 | &JitShader::Compile_MAD, // madi |
| 90 | &JitShader::Compile_MAD, // mad | 86 | &JitShader::Compile_MAD, // mad |
| 91 | &JitShader::Compile_MAD, // mad | 87 | &JitShader::Compile_MAD, // mad |
| 92 | &JitShader::Compile_MAD, // mad | 88 | &JitShader::Compile_MAD, // mad |
| 93 | &JitShader::Compile_MAD, // mad | 89 | &JitShader::Compile_MAD, // mad |
| 94 | &JitShader::Compile_MAD, // mad | 90 | &JitShader::Compile_MAD, // mad |
| 95 | &JitShader::Compile_MAD, // mad | 91 | &JitShader::Compile_MAD, // mad |
| 96 | &JitShader::Compile_MAD, // mad | 92 | &JitShader::Compile_MAD, // mad |
| 97 | &JitShader::Compile_MAD, // mad | 93 | &JitShader::Compile_MAD, // mad |
| 98 | }; | 94 | }; |
| 99 | 95 | ||
| 100 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can | 96 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can |
| @@ -136,9 +132,9 @@ static const X64Reg NEGBIT = XMM15; | |||
| 136 | // State registers that must not be modified by external functions calls | 132 | // State registers that must not be modified by external functions calls |
| 137 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | 133 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed |
| 138 | static const BitSet32 persistent_regs = { | 134 | static const BitSet32 persistent_regs = { |
| 139 | SETUP, STATE, // Pointers to register blocks | 135 | SETUP, STATE, // Pointers to register blocks |
| 140 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers | 136 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers |
| 141 | ONE+16, NEGBIT+16, // Constants | 137 | ONE + 16, NEGBIT + 16, // Constants |
| 142 | }; | 138 | }; |
| 143 | 139 | ||
| 144 | /// Raw constant for the source register selector that indicates no swizzling is performed | 140 | /// Raw constant for the source register selector that indicates no swizzling is performed |
| @@ -152,7 +148,7 @@ static const u8 NO_DEST_REG_MASK = 0xf; | |||
| 152 | * @return Instruction at the specified offset | 148 | * @return Instruction at the specified offset |
| 153 | */ | 149 | */ |
| 154 | static Instruction GetVertexShaderInstruction(size_t offset) { | 150 | static Instruction GetVertexShaderInstruction(size_t offset) { |
| 155 | return { g_state.vs.program_code[offset] }; | 151 | return {g_state.vs.program_code[offset]}; |
| 156 | } | 152 | } |
| 157 | 153 | ||
| 158 | static void LogCritical(const char* msg) { | 154 | static void LogCritical(const char* msg) { |
| @@ -172,7 +168,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) { | |||
| 172 | * @param src_reg SourceRegister object corresponding to the source register to load | 168 | * @param src_reg SourceRegister object corresponding to the source register to load |
| 173 | * @param dest Destination XMM register to store the loaded, swizzled source register | 169 | * @param dest Destination XMM register to store the loaded, swizzled source register |
| 174 | */ | 170 | */ |
| 175 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { | 171 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, |
| 172 | X64Reg dest) { | ||
| 176 | X64Reg src_ptr; | 173 | X64Reg src_ptr; |
| 177 | size_t src_offset; | 174 | size_t src_offset; |
| 178 | 175 | ||
| @@ -189,7 +186,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 189 | 186 | ||
| 190 | unsigned operand_desc_id; | 187 | unsigned operand_desc_id; |
| 191 | 188 | ||
| 192 | const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | 189 | const bool is_inverted = |
| 190 | (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | ||
| 193 | 191 | ||
| 194 | unsigned address_register_index; | 192 | unsigned address_register_index; |
| 195 | unsigned offset_src; | 193 | unsigned offset_src; |
| @@ -225,7 +223,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 225 | MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); | 223 | MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); |
| 226 | } | 224 | } |
| 227 | 225 | ||
| 228 | SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; | 226 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; |
| 229 | 227 | ||
| 230 | // Generate instructions for source register swizzling as needed | 228 | // Generate instructions for source register swizzling as needed |
| 231 | u8 sel = swiz.GetRawSelector(src_num); | 229 | u8 sel = swiz.GetRawSelector(src_num); |
| @@ -238,13 +236,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 238 | } | 236 | } |
| 239 | 237 | ||
| 240 | // If the source register should be negated, flip the negative bit using XOR | 238 | // If the source register should be negated, flip the negative bit using XOR |
| 241 | const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 }; | 239 | const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3}; |
| 242 | if (negate[src_num - 1]) { | 240 | if (negate[src_num - 1]) { |
| 243 | XORPS(dest, R(NEGBIT)); | 241 | XORPS(dest, R(NEGBIT)); |
| 244 | } | 242 | } |
| 245 | } | 243 | } |
| 246 | 244 | ||
| 247 | void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | 245 | void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) { |
| 248 | DestRegister dest; | 246 | DestRegister dest; |
| 249 | unsigned operand_desc_id; | 247 | unsigned operand_desc_id; |
| 250 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | 248 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || |
| @@ -256,10 +254,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 256 | dest = instr.common.dest.Value(); | 254 | dest = instr.common.dest.Value(); |
| 257 | } | 255 | } |
| 258 | 256 | ||
| 259 | SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; | 257 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; |
| 260 | 258 | ||
| 261 | int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); | 259 | int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); |
| 262 | ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type"); | 260 | ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), |
| 261 | "Destinaton offset too large for int type"); | ||
| 263 | 262 | ||
| 264 | // If all components are enabled, write the result to the destination register | 263 | // If all components are enabled, write the result to the destination register |
| 265 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 264 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| @@ -267,18 +266,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 267 | MOVAPS(MDisp(STATE, dest_offset_disp), src); | 266 | MOVAPS(MDisp(STATE, dest_offset_disp), src); |
| 268 | 267 | ||
| 269 | } else { | 268 | } else { |
| 270 | // Not all components are enabled, so mask the result when storing to the destination register... | 269 | // Not all components are enabled, so mask the result when storing to the destination |
| 270 | // register... | ||
| 271 | MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); | 271 | MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); |
| 272 | 272 | ||
| 273 | if (Common::GetCPUCaps().sse4_1) { | 273 | if (Common::GetCPUCaps().sse4_1) { |
| 274 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 274 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | |
| 275 | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||
| 275 | BLENDPS(SCRATCH, R(src), mask); | 276 | BLENDPS(SCRATCH, R(src), mask); |
| 276 | } else { | 277 | } else { |
| 277 | MOVAPS(SCRATCH2, R(src)); | 278 | MOVAPS(SCRATCH2, R(src)); |
| 278 | UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination | 279 | UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination |
| 279 | UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination | 280 | UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination |
| 280 | 281 | ||
| 281 | // Compute selector to selectively copy source components to destination for SHUFPS instruction | 282 | // Compute selector to selectively copy source components to destination for SHUFPS |
| 283 | // instruction | ||
| 282 | u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | | 284 | u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | |
| 283 | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | | 285 | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | |
| 284 | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | | 286 | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | |
| @@ -336,7 +338,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | |||
| 336 | } | 338 | } |
| 337 | 339 | ||
| 338 | void JitShader::Compile_UniformCondition(Instruction instr) { | 340 | void JitShader::Compile_UniformCondition(Instruction instr) { |
| 339 | int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); | 341 | int offset = |
| 342 | ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); | ||
| 340 | CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); | 343 | CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); |
| 341 | } | 344 | } |
| 342 | 345 | ||
| @@ -512,7 +515,7 @@ void JitShader::Compile_MIN(Instruction instr) { | |||
| 512 | } | 515 | } |
| 513 | 516 | ||
| 514 | void JitShader::Compile_MOVA(Instruction instr) { | 517 | void JitShader::Compile_MOVA(Instruction instr) { |
| 515 | SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; | 518 | SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]}; |
| 516 | 519 | ||
| 517 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { | 520 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { |
| 518 | return; // NoOp | 521 | return; // NoOp |
| @@ -583,8 +586,7 @@ void JitShader::Compile_RSQ(Instruction instr) { | |||
| 583 | Compile_DestEnable(instr, SRC1); | 586 | Compile_DestEnable(instr, SRC1); |
| 584 | } | 587 | } |
| 585 | 588 | ||
| 586 | void JitShader::Compile_NOP(Instruction instr) { | 589 | void JitShader::Compile_NOP(Instruction instr) {} |
| 587 | } | ||
| 588 | 590 | ||
| 589 | void JitShader::Compile_END(Instruction instr) { | 591 | void JitShader::Compile_END(Instruction instr) { |
| 590 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 592 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| @@ -597,7 +599,7 @@ void JitShader::Compile_CALL(Instruction instr) { | |||
| 597 | 599 | ||
| 598 | // Call the subroutine | 600 | // Call the subroutine |
| 599 | FixupBranch b = CALL(); | 601 | FixupBranch b = CALL(); |
| 600 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | 602 | fixup_branches.push_back({b, instr.flow_control.dest_offset}); |
| 601 | 603 | ||
| 602 | // Skip over the return offset that's on the stack | 604 | // Skip over the return offset that's on the stack |
| 603 | ADD(64, R(RSP), Imm32(8)); | 605 | ADD(64, R(RSP), Imm32(8)); |
| @@ -628,7 +630,7 @@ void JitShader::Compile_CMP(Instruction instr) { | |||
| 628 | // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to | 630 | // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to |
| 629 | // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here | 631 | // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here |
| 630 | // because they don't match when used with NaNs. | 632 | // because they don't match when used with NaNs. |
| 631 | static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; | 633 | static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE}; |
| 632 | 634 | ||
| 633 | bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); | 635 | bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); |
| 634 | Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; | 636 | Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; |
| @@ -678,7 +680,8 @@ void JitShader::Compile_MAD(Instruction instr) { | |||
| 678 | } | 680 | } |
| 679 | 681 | ||
| 680 | void JitShader::Compile_IF(Instruction instr) { | 682 | void JitShader::Compile_IF(Instruction instr) { |
| 681 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); | 683 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, |
| 684 | "Backwards if-statements not supported"); | ||
| 682 | 685 | ||
| 683 | // Evaluate the "IF" condition | 686 | // Evaluate the "IF" condition |
| 684 | if (instr.opcode.Value() == OpCode::Id::IFU) { | 687 | if (instr.opcode.Value() == OpCode::Id::IFU) { |
| @@ -709,29 +712,31 @@ void JitShader::Compile_IF(Instruction instr) { | |||
| 709 | } | 712 | } |
| 710 | 713 | ||
| 711 | void JitShader::Compile_LOOP(Instruction instr) { | 714 | void JitShader::Compile_LOOP(Instruction instr) { |
| 712 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); | 715 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, |
| 716 | "Backwards loops not supported"); | ||
| 713 | Compile_Assert(!looping, "Nested loops not supported"); | 717 | Compile_Assert(!looping, "Nested loops not supported"); |
| 714 | 718 | ||
| 715 | looping = true; | 719 | looping = true; |
| 716 | 720 | ||
| 717 | int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | 721 | int offset = |
| 722 | ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | ||
| 718 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); | 723 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); |
| 719 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); | 724 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); |
| 720 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); | 725 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); |
| 721 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start | 726 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start |
| 722 | MOV(32, R(LOOPINC), R(LOOPCOUNT)); | 727 | MOV(32, R(LOOPINC), R(LOOPCOUNT)); |
| 723 | SHR(32, R(LOOPINC), Imm8(16)); | 728 | SHR(32, R(LOOPINC), Imm8(16)); |
| 724 | MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer | 729 | MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer |
| 725 | MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count | 730 | MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count |
| 726 | ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 | 731 | ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 |
| 727 | 732 | ||
| 728 | auto loop_start = GetCodePtr(); | 733 | auto loop_start = GetCodePtr(); |
| 729 | 734 | ||
| 730 | Compile_Block(instr.flow_control.dest_offset + 1); | 735 | Compile_Block(instr.flow_control.dest_offset + 1); |
| 731 | 736 | ||
| 732 | ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component | 737 | ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component |
| 733 | SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 | 738 | SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 |
| 734 | J_CC(CC_NZ, loop_start); // Loop if not equal | 739 | J_CC(CC_NZ, loop_start); // Loop if not equal |
| 735 | 740 | ||
| 736 | looping = false; | 741 | looping = false; |
| 737 | } | 742 | } |
| @@ -744,11 +749,11 @@ void JitShader::Compile_JMP(Instruction instr) { | |||
| 744 | else | 749 | else |
| 745 | UNREACHABLE(); | 750 | UNREACHABLE(); |
| 746 | 751 | ||
| 747 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && | 752 | bool inverted_condition = |
| 748 | (instr.flow_control.num_instructions & 1); | 753 | (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1); |
| 749 | 754 | ||
| 750 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); | 755 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); |
| 751 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | 756 | fixup_branches.push_back({b, instr.flow_control.dest_offset}); |
| 752 | } | 757 | } |
| 753 | 758 | ||
| 754 | void JitShader::Compile_Block(unsigned end) { | 759 | void JitShader::Compile_Block(unsigned end) { |
| @@ -773,7 +778,8 @@ void JitShader::Compile_NextInstr() { | |||
| 773 | Compile_Return(); | 778 | Compile_Return(); |
| 774 | } | 779 | } |
| 775 | 780 | ||
| 776 | ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); | 781 | ASSERT_MSG(code_ptr[program_counter] == nullptr, |
| 782 | "Tried to compile already compiled shader location!"); | ||
| 777 | code_ptr[program_counter] = GetCodePtr(); | 783 | code_ptr[program_counter] = GetCodePtr(); |
| 778 | 784 | ||
| 779 | Instruction instr = GetVertexShaderInstruction(program_counter++); | 785 | Instruction instr = GetVertexShaderInstruction(program_counter++); |
| @@ -787,7 +793,7 @@ void JitShader::Compile_NextInstr() { | |||
| 787 | } else { | 793 | } else { |
| 788 | // Unhandled instruction | 794 | // Unhandled instruction |
| 789 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", | 795 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", |
| 790 | instr.opcode.Value().EffectiveOpCode(), instr.hex); | 796 | instr.opcode.Value().EffectiveOpCode(), instr.hex); |
| 791 | } | 797 | } |
| 792 | } | 798 | } |
| 793 | 799 | ||
| @@ -801,7 +807,8 @@ void JitShader::FindReturnOffsets() { | |||
| 801 | case OpCode::Id::CALL: | 807 | case OpCode::Id::CALL: |
| 802 | case OpCode::Id::CALLC: | 808 | case OpCode::Id::CALLC: |
| 803 | case OpCode::Id::CALLU: | 809 | case OpCode::Id::CALLU: |
| 804 | return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); | 810 | return_offsets.push_back(instr.flow_control.dest_offset + |
| 811 | instr.flow_control.num_instructions); | ||
| 805 | break; | 812 | break; |
| 806 | default: | 813 | default: |
| 807 | break; | 814 | break; |
| @@ -835,12 +842,12 @@ void JitShader::Compile() { | |||
| 835 | XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); | 842 | XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); |
| 836 | 843 | ||
| 837 | // Used to set a register to one | 844 | // Used to set a register to one |
| 838 | static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; | 845 | static const __m128 one = {1.f, 1.f, 1.f, 1.f}; |
| 839 | MOV(PTRBITS, R(RAX), ImmPtr(&one)); | 846 | MOV(PTRBITS, R(RAX), ImmPtr(&one)); |
| 840 | MOVAPS(ONE, MatR(RAX)); | 847 | MOVAPS(ONE, MatR(RAX)); |
| 841 | 848 | ||
| 842 | // Used to negate registers | 849 | // Used to negate registers |
| 843 | static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; | 850 | static const __m128 neg = {-0.f, -0.f, -0.f, -0.f}; |
| 844 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); | 851 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); |
| 845 | MOVAPS(NEGBIT, MatR(RAX)); | 852 | MOVAPS(NEGBIT, MatR(RAX)); |
| 846 | 853 | ||
| @@ -850,7 +857,8 @@ void JitShader::Compile() { | |||
| 850 | // Compile entire program | 857 | // Compile entire program |
| 851 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | 858 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); |
| 852 | 859 | ||
| 853 | // Set the target for any incomplete branches now that the entire shader program has been emitted | 860 | // Set the target for any incomplete branches now that the entire shader program has been |
| 861 | // emitted | ||
| 854 | for (const auto& branch : fixup_branches) { | 862 | for (const auto& branch : fixup_branches) { |
| 855 | SetJumpTarget(branch.first, code_ptr[branch.second]); | 863 | SetJumpTarget(branch.first, code_ptr[branch.second]); |
| 856 | } | 864 | } |
| @@ -861,7 +869,8 @@ void JitShader::Compile() { | |||
| 861 | fixup_branches.clear(); | 869 | fixup_branches.clear(); |
| 862 | fixup_branches.shrink_to_fit(); | 870 | fixup_branches.shrink_to_fit(); |
| 863 | 871 | ||
| 864 | uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); | 872 | uintptr_t size = |
| 873 | reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); | ||
| 865 | ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); | 874 | ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); |
| 866 | 875 | ||
| 867 | LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); | 876 | LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5468459d4..98de5ecef 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -8,13 +8,10 @@ | |||
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | |||
| 12 | #include <nihstro/shader_bytecode.h> | 11 | #include <nihstro/shader_bytecode.h> |
| 13 | |||
| 14 | #include "common/bit_set.h" | 12 | #include "common/bit_set.h" |
| 15 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 16 | #include "common/x64/emitter.h" | 14 | #include "common/x64/emitter.h" |
| 17 | |||
| 18 | #include "video_core/shader/shader.h" | 15 | #include "video_core/shader/shader.h" |
| 19 | 16 | ||
| 20 | using nihstro::Instruction; | 17 | using nihstro::Instruction; |
| @@ -70,11 +67,11 @@ public: | |||
| 70 | void Compile_MAD(Instruction instr); | 67 | void Compile_MAD(Instruction instr); |
| 71 | 68 | ||
| 72 | private: | 69 | private: |
| 73 | |||
| 74 | void Compile_Block(unsigned end); | 70 | void Compile_Block(unsigned end); |
| 75 | void Compile_NextInstr(); | 71 | void Compile_NextInstr(); |
| 76 | 72 | ||
| 77 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); | 73 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, |
| 74 | Gen::X64Reg dest); | ||
| 78 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); | 75 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); |
| 79 | 76 | ||
| 80 | /** | 77 | /** |
| @@ -111,8 +108,8 @@ private: | |||
| 111 | /// Offsets in code where a return needs to be inserted | 108 | /// Offsets in code where a return needs to be inserted |
| 112 | std::vector<unsigned> return_offsets; | 109 | std::vector<unsigned> return_offsets; |
| 113 | 110 | ||
| 114 | unsigned program_counter = 0; ///< Offset of the next instruction to decode | 111 | unsigned program_counter = 0; ///< Offset of the next instruction to decode |
| 115 | bool looping = false; ///< True if compiling a loop, used to check for nested loops | 112 | bool looping = false; ///< True if compiling a loop, used to check for nested loops |
| 116 | 113 | ||
| 117 | /// Branches that need to be fixed up once the entire shader program is compiled | 114 | /// Branches that need to be fixed up once the entire shader program is compiled |
| 118 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | 115 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; |
diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer.cpp index 03df15b01..9cd21f72b 100644 --- a/src/video_core/swrasterizer.cpp +++ b/src/video_core/swrasterizer.cpp | |||
| @@ -8,9 +8,8 @@ | |||
| 8 | namespace VideoCore { | 8 | namespace VideoCore { |
| 9 | 9 | ||
| 10 | void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0, | 10 | void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0, |
| 11 | const Pica::Shader::OutputVertex& v1, | 11 | const Pica::Shader::OutputVertex& v1, |
| 12 | const Pica::Shader::OutputVertex& v2) { | 12 | const Pica::Shader::OutputVertex& v2) { |
| 13 | Pica::Clipper::ProcessTriangle(v0, v1, v2); | 13 | Pica::Clipper::ProcessTriangle(v0, v1, v2); |
| 14 | } | 14 | } |
| 15 | |||
| 16 | } | 15 | } |
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h index 0a028b774..6d42d7409 100644 --- a/src/video_core/swrasterizer.h +++ b/src/video_core/swrasterizer.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | |||
| 9 | #include "video_core/rasterizer_interface.h" | 8 | #include "video_core/rasterizer_interface.h" |
| 10 | 9 | ||
| 11 | namespace Pica { | 10 | namespace Pica { |
| @@ -17,14 +16,12 @@ struct OutputVertex; | |||
| 17 | namespace VideoCore { | 16 | namespace VideoCore { |
| 18 | 17 | ||
| 19 | class SWRasterizer : public RasterizerInterface { | 18 | class SWRasterizer : public RasterizerInterface { |
| 20 | void AddTriangle(const Pica::Shader::OutputVertex& v0, | 19 | void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, |
| 21 | const Pica::Shader::OutputVertex& v1, | 20 | const Pica::Shader::OutputVertex& v2) override; |
| 22 | const Pica::Shader::OutputVertex& v2) override; | ||
| 23 | void DrawTriangles() override {} | 21 | void DrawTriangles() override {} |
| 24 | void NotifyPicaRegisterChanged(u32 id) override {} | 22 | void NotifyPicaRegisterChanged(u32 id) override {} |
| 25 | void FlushAll() override {} | 23 | void FlushAll() override {} |
| 26 | void FlushRegion(PAddr addr, u32 size) override {} | 24 | void FlushRegion(PAddr addr, u32 size) override {} |
| 27 | void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} | 25 | void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} |
| 28 | }; | 26 | }; |
| 29 | |||
| 30 | } | 27 | } |
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index e40f0f1ee..2b8ef7018 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp | |||
| @@ -1,16 +1,12 @@ | |||
| 1 | #include <memory> | 1 | #include <memory> |
| 2 | |||
| 3 | #include <boost/range/algorithm/fill.hpp> | 2 | #include <boost/range/algorithm/fill.hpp> |
| 4 | |||
| 5 | #include "common/alignment.h" | 3 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 4 | #include "common/assert.h" |
| 7 | #include "common/bit_field.h" | 5 | #include "common/bit_field.h" |
| 8 | #include "common/common_types.h" | 6 | #include "common/common_types.h" |
| 9 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 10 | #include "common/vector_math.h" | 8 | #include "common/vector_math.h" |
| 11 | |||
| 12 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| 13 | |||
| 14 | #include "video_core/debug_utils/debug_utils.h" | 10 | #include "video_core/debug_utils/debug_utils.h" |
| 15 | #include "video_core/pica.h" | 11 | #include "video_core/pica.h" |
| 16 | #include "video_core/pica_state.h" | 12 | #include "video_core/pica_state.h" |
| @@ -41,24 +37,32 @@ void VertexLoader::Setup(const Pica::Regs& regs) { | |||
| 41 | // TODO: What happens if a loader overwrites a previous one's data? | 37 | // TODO: What happens if a loader overwrites a previous one's data? |
| 42 | for (unsigned component = 0; component < loader_config.component_count; ++component) { | 38 | for (unsigned component = 0; component < loader_config.component_count; ++component) { |
| 43 | if (component >= 12) { | 39 | if (component >= 12) { |
| 44 | LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); | 40 | LOG_ERROR(HW_GPU, |
| 41 | "Overflow in the vertex attribute loader %u trying to load component %u", | ||
| 42 | loader, component); | ||
| 45 | continue; | 43 | continue; |
| 46 | } | 44 | } |
| 47 | 45 | ||
| 48 | u32 attribute_index = loader_config.GetComponent(component); | 46 | u32 attribute_index = loader_config.GetComponent(component); |
| 49 | if (attribute_index < 12) { | 47 | if (attribute_index < 12) { |
| 50 | offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); | 48 | offset = Common::AlignUp(offset, |
| 49 | attribute_config.GetElementSizeInBytes(attribute_index)); | ||
| 51 | vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; | 50 | vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; |
| 52 | vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | 51 | vertex_attribute_strides[attribute_index] = |
| 53 | vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | 52 | static_cast<u32>(loader_config.byte_count); |
| 54 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | 53 | vertex_attribute_formats[attribute_index] = |
| 54 | attribute_config.GetFormat(attribute_index); | ||
| 55 | vertex_attribute_elements[attribute_index] = | ||
| 56 | attribute_config.GetNumElements(attribute_index); | ||
| 55 | offset += attribute_config.GetStride(attribute_index); | 57 | offset += attribute_config.GetStride(attribute_index); |
| 56 | } else if (attribute_index < 16) { | 58 | } else if (attribute_index < 16) { |
| 57 | // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively | 59 | // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, |
| 60 | // respectively | ||
| 58 | offset = Common::AlignUp(offset, 4); | 61 | offset = Common::AlignUp(offset, 4); |
| 59 | offset += (attribute_index - 11) * 4; | 62 | offset += (attribute_index - 11) * 4; |
| 60 | } else { | 63 | } else { |
| 61 | UNREACHABLE(); // This is truly unreachable due to the number of bits for each component | 64 | UNREACHABLE(); // This is truly unreachable due to the number of bits for each |
| 65 | // component | ||
| 62 | } | 66 | } |
| 63 | } | 67 | } |
| 64 | } | 68 | } |
| @@ -66,48 +70,55 @@ void VertexLoader::Setup(const Pica::Regs& regs) { | |||
| 66 | is_setup = true; | 70 | is_setup = true; |
| 67 | } | 71 | } |
| 68 | 72 | ||
| 69 | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { | 73 | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, |
| 74 | DebugUtils::MemoryAccessTracker& memory_accesses) { | ||
| 70 | ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); | 75 | ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); |
| 71 | 76 | ||
| 72 | for (int i = 0; i < num_total_attributes; ++i) { | 77 | for (int i = 0; i < num_total_attributes; ++i) { |
| 73 | if (vertex_attribute_elements[i] != 0) { | 78 | if (vertex_attribute_elements[i] != 0) { |
| 74 | // Load per-vertex data from the loader arrays | 79 | // Load per-vertex data from the loader arrays |
| 75 | u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; | 80 | u32 source_addr = |
| 81 | base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; | ||
| 76 | 82 | ||
| 77 | if (g_debug_context && Pica::g_debug_context->recorder) { | 83 | if (g_debug_context && Pica::g_debug_context->recorder) { |
| 78 | memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * ( | 84 | memory_accesses.AddAccess( |
| 79 | (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | 85 | source_addr, |
| 80 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1)); | 86 | vertex_attribute_elements[i] * |
| 87 | ((vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) | ||
| 88 | ? 4 | ||
| 89 | : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) | ||
| 90 | ? 2 | ||
| 91 | : 1)); | ||
| 81 | } | 92 | } |
| 82 | 93 | ||
| 83 | switch (vertex_attribute_formats[i]) { | 94 | switch (vertex_attribute_formats[i]) { |
| 84 | case Regs::VertexAttributeFormat::BYTE: | 95 | case Regs::VertexAttributeFormat::BYTE: { |
| 85 | { | 96 | const s8* srcdata = |
| 86 | const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); | 97 | reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); |
| 87 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | 98 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |
| 88 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | 99 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |
| 89 | } | 100 | } |
| 90 | break; | 101 | break; |
| 91 | } | 102 | } |
| 92 | case Regs::VertexAttributeFormat::UBYTE: | 103 | case Regs::VertexAttributeFormat::UBYTE: { |
| 93 | { | 104 | const u8* srcdata = |
| 94 | const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); | 105 | reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); |
| 95 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | 106 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |
| 96 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | 107 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |
| 97 | } | 108 | } |
| 98 | break; | 109 | break; |
| 99 | } | 110 | } |
| 100 | case Regs::VertexAttributeFormat::SHORT: | 111 | case Regs::VertexAttributeFormat::SHORT: { |
| 101 | { | 112 | const s16* srcdata = |
| 102 | const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); | 113 | reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); |
| 103 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | 114 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |
| 104 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | 115 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |
| 105 | } | 116 | } |
| 106 | break; | 117 | break; |
| 107 | } | 118 | } |
| 108 | case Regs::VertexAttributeFormat::FLOAT: | 119 | case Regs::VertexAttributeFormat::FLOAT: { |
| 109 | { | 120 | const float* srcdata = |
| 110 | const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); | 121 | reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); |
| 111 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | 122 | for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |
| 112 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | 123 | input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |
| 113 | } | 124 | } |
| @@ -119,22 +130,23 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I | |||
| 119 | // is *not* carried over from the default attribute settings even if they're | 130 | // is *not* carried over from the default attribute settings even if they're |
| 120 | // enabled for this attribute. | 131 | // enabled for this attribute. |
| 121 | for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { | 132 | for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { |
| 122 | input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 133 | input.attr[i][comp] = |
| 134 | comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||
| 123 | } | 135 | } |
| 124 | 136 | ||
| 125 | LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", | 137 | LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from " |
| 126 | vertex_attribute_elements[i], i, vertex, index, | 138 | "0x%08x + 0x%08x + 0x%04x: %f %f %f %f", |
| 127 | base_address, | 139 | vertex_attribute_elements[i], i, vertex, index, base_address, |
| 128 | vertex_attribute_sources[i], | 140 | vertex_attribute_sources[i], vertex_attribute_strides[i] * vertex, |
| 129 | vertex_attribute_strides[i] * vertex, | 141 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |
| 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | 142 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |
| 131 | } else if (vertex_attribute_is_default[i]) { | 143 | } else if (vertex_attribute_is_default[i]) { |
| 132 | // Load the default attribute if we're configured to do so | 144 | // Load the default attribute if we're configured to do so |
| 133 | input.attr[i] = g_state.vs_default_attributes[i]; | 145 | input.attr[i] = g_state.vs_default_attributes[i]; |
| 134 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | 146 | LOG_TRACE(HW_GPU, |
| 135 | i, vertex, index, | 147 | "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, |
| 136 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | 148 | vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |
| 137 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | 149 | input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |
| 138 | } else { | 150 | } else { |
| 139 | // TODO(yuriks): In this case, no data gets loaded and the vertex | 151 | // TODO(yuriks): In this case, no data gets loaded and the vertex |
| 140 | // remains with the last value it had. This isn't currently maintained | 152 | // remains with the last value it had. This isn't currently maintained |
| @@ -143,4 +155,4 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I | |||
| 143 | } | 155 | } |
| 144 | } | 156 | } |
| 145 | 157 | ||
| 146 | } // namespace Pica | 158 | } // namespace Pica |
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index ac162c254..9f2098bb2 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h | |||
| @@ -1,7 +1,6 @@ | |||
| 1 | #pragma once | 1 | #pragma once |
| 2 | 2 | ||
| 3 | #include <array> | 3 | #include <array> |
| 4 | |||
| 5 | #include "common/common_types.h" | 4 | #include "common/common_types.h" |
| 6 | #include "video_core/pica.h" | 5 | #include "video_core/pica.h" |
| 7 | 6 | ||
| @@ -23,9 +22,12 @@ public: | |||
| 23 | } | 22 | } |
| 24 | 23 | ||
| 25 | void Setup(const Pica::Regs& regs); | 24 | void Setup(const Pica::Regs& regs); |
| 26 | void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses); | 25 | void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, |
| 26 | DebugUtils::MemoryAccessTracker& memory_accesses); | ||
| 27 | 27 | ||
| 28 | int GetNumTotalAttributes() const { return num_total_attributes; } | 28 | int GetNumTotalAttributes() const { |
| 29 | return num_total_attributes; | ||
| 30 | } | ||
| 29 | 31 | ||
| 30 | private: | 32 | private: |
| 31 | std::array<u32, 16> vertex_attribute_sources; | 33 | std::array<u32, 16> vertex_attribute_sources; |
| @@ -37,4 +39,4 @@ private: | |||
| 37 | bool is_setup = false; | 39 | bool is_setup = false; |
| 38 | }; | 40 | }; |
| 39 | 41 | ||
| 40 | } // namespace Pica | 42 | } // namespace Pica |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index bd6e5eb6b..83e33dfc2 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -3,21 +3,19 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <memory> | 5 | #include <memory> |
| 6 | |||
| 7 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 8 | |||
| 9 | #include "video_core/pica.h" | 7 | #include "video_core/pica.h" |
| 10 | #include "video_core/renderer_base.h" | 8 | #include "video_core/renderer_base.h" |
| 11 | #include "video_core/video_core.h" | ||
| 12 | #include "video_core/renderer_opengl/renderer_opengl.h" | 9 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 10 | #include "video_core/video_core.h" | ||
| 13 | 11 | ||
| 14 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 12 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 15 | // Video Core namespace | 13 | // Video Core namespace |
| 16 | 14 | ||
| 17 | namespace VideoCore { | 15 | namespace VideoCore { |
| 18 | 16 | ||
| 19 | EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window | 17 | EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window |
| 20 | std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin | 18 | std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin |
| 21 | 19 | ||
| 22 | std::atomic<bool> g_hw_renderer_enabled; | 20 | std::atomic<bool> g_hw_renderer_enabled; |
| 23 | std::atomic<bool> g_shader_jit_enabled; | 21 | std::atomic<bool> g_shader_jit_enabled; |
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 30267489e..e2d725ab1 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h | |||
| @@ -22,18 +22,19 @@ namespace VideoCore { | |||
| 22 | // framebuffers in video memory are stored in column-major order and rendered sideways, causing | 22 | // framebuffers in video memory are stored in column-major order and rendered sideways, causing |
| 23 | // the widths and heights of the framebuffers read by the LCD to be switched compared to the | 23 | // the widths and heights of the framebuffers read by the LCD to be switched compared to the |
| 24 | // heights and widths of the screens listed here. | 24 | // heights and widths of the screens listed here. |
| 25 | static const int kScreenTopWidth = 400; ///< 3DS top screen width | 25 | static const int kScreenTopWidth = 400; ///< 3DS top screen width |
| 26 | static const int kScreenTopHeight = 240; ///< 3DS top screen height | 26 | static const int kScreenTopHeight = 240; ///< 3DS top screen height |
| 27 | static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width | 27 | static const int kScreenBottomWidth = 320; ///< 3DS bottom screen width |
| 28 | static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height | 28 | static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height |
| 29 | 29 | ||
| 30 | // Video core renderer | 30 | // Video core renderer |
| 31 | // --------------------- | 31 | // --------------------- |
| 32 | 32 | ||
| 33 | extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin | 33 | extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin |
| 34 | extern EmuWindow* g_emu_window; ///< Emu window | 34 | extern EmuWindow* g_emu_window; ///< Emu window |
| 35 | 35 | ||
| 36 | // TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) | 36 | // TODO: Wrap these in a user settings struct along with any other graphics settings (often set from |
| 37 | // qt ui) | ||
| 37 | extern std::atomic<bool> g_hw_renderer_enabled; | 38 | extern std::atomic<bool> g_hw_renderer_enabled; |
| 38 | extern std::atomic<bool> g_shader_jit_enabled; | 39 | extern std::atomic<bool> g_shader_jit_enabled; |
| 39 | extern std::atomic<bool> g_scaled_resolution_enabled; | 40 | extern std::atomic<bool> g_scaled_resolution_enabled; |