summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/clipper.cpp10
-rw-r--r--src/video_core/command_processor.cpp34
-rw-r--r--src/video_core/pica.h401
-rw-r--r--src/video_core/pica_types.h146
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp325
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h147
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp219
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_state.h4
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h12
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp4
-rw-r--r--src/video_core/shader/shader.cpp6
-rw-r--r--src/video_core/shader/shader.h8
15 files changed, 1167 insertions, 160 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c3d7294d5..4b5d298f3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -33,6 +33,7 @@ set(HEADERS
33 command_processor.h 33 command_processor.h
34 gpu_debugger.h 34 gpu_debugger.h
35 pica.h 35 pica.h
36 pica_types.h
36 primitive_assembly.h 37 primitive_assembly.h
37 rasterizer.h 38 rasterizer.h
38 rasterizer_interface.h 39 rasterizer_interface.h
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 5d609da06..a385589d2 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -59,15 +59,17 @@ static void InitScreenCoordinates(OutputVertex& vtx)
59 } viewport; 59 } viewport;
60 60
61 const auto& regs = g_state.regs; 61 const auto& regs = g_state.regs;
62 viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); 62 viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x);
63 viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); 63 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
64 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 64 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
65 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 65 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
66 viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range); 66 viewport.zscale = float24::FromRaw(regs.viewport_depth_range);
67 viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane); 67 viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane);
68 68
69 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 69 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
70 vtx.color *= inv_w; 70 vtx.color *= inv_w;
71 vtx.view *= inv_w;
72 vtx.quat *= inv_w;
71 vtx.tc0 *= inv_w; 73 vtx.tc0 *= inv_w;
72 vtx.tc1 *= inv_w; 74 vtx.tc1 *= inv_w;
73 vtx.tc2 *= inv_w; 75 vtx.tc2 *= inv_w;
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 59c75042c..5dfedfe31 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -98,10 +98,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
98 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; 98 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index];
99 99
100 // NOTE: The destination component order indeed is "backwards" 100 // NOTE: The destination component order indeed is "backwards"
101 attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); 101 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
102 attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); 102 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
103 attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); 103 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
104 attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); 104 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
105 105
106 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, 106 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
107 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), 107 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
@@ -418,10 +418,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
418 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); 418 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
419 } else { 419 } else {
420 // TODO: Untested 420 // TODO: Untested
421 uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); 421 uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
422 uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); 422 uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
423 uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); 423 uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
424 uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); 424 uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
425 } 425 }
426 426
427 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, 427 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
@@ -464,6 +464,24 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
464 break; 464 break;
465 } 465 }
466 466
467 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
468 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
469 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
470 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
471 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
472 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
473 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
474 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
475 {
476 auto& lut_config = regs.lighting.lut_config;
477
478 ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!");
479
480 g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
481 lut_config.index = lut_config.index + 1;
482 break;
483 }
484
467 default: 485 default:
468 break; 486 break;
469 } 487 }
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 2f1b2dec4..9077b1725 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -16,6 +16,8 @@
16#include "common/vector_math.h" 16#include "common/vector_math.h"
17#include "common/logging/log.h" 17#include "common/logging/log.h"
18 18
19#include "pica_types.h"
20
19namespace Pica { 21namespace Pica {
20 22
21// Returns index corresponding to the Regs member labeled by field_name 23// Returns index corresponding to the Regs member labeled by field_name
@@ -239,7 +241,8 @@ struct Regs {
239 TextureConfig texture0; 241 TextureConfig texture0;
240 INSERT_PADDING_WORDS(0x8); 242 INSERT_PADDING_WORDS(0x8);
241 BitField<0, 4, TextureFormat> texture0_format; 243 BitField<0, 4, TextureFormat> texture0_format;
242 INSERT_PADDING_WORDS(0x2); 244 BitField<0, 1, u32> fragment_lighting_enable;
245 INSERT_PADDING_WORDS(0x1);
243 TextureConfig texture1; 246 TextureConfig texture1;
244 BitField<0, 4, TextureFormat> texture1_format; 247 BitField<0, 4, TextureFormat> texture1_format;
245 INSERT_PADDING_WORDS(0x2); 248 INSERT_PADDING_WORDS(0x2);
@@ -641,7 +644,268 @@ struct Regs {
641 } 644 }
642 } 645 }
643 646
644 INSERT_PADDING_WORDS(0xe0); 647 INSERT_PADDING_WORDS(0x20);
648
649 enum class LightingSampler {
650 Distribution0 = 0,
651 Distribution1 = 1,
652 Fresnel = 3,
653 ReflectBlue = 4,
654 ReflectGreen = 5,
655 ReflectRed = 6,
656 SpotlightAttenuation = 8,
657 DistanceAttenuation = 16,
658 };
659
660 /**
661 * Pica fragment lighting supports using different LUTs for each lighting component:
662 * Reflectance R, G, and B channels, distribution function for specular components 0 and 1,
663 * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel
664 * (or whether a channel is enabled at all) is specified by various pre-defined lighting
665 * configurations. With configurations that require more LUTs, more cycles are required on HW to
666 * perform lighting computations.
667 */
668 enum class LightingConfig {
669 Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight
670 Config1 = 1, ///< Reflect Red, Fresnel, Spotlight
671 Config2 = 2, ///< Reflect Red, Distribution 0/1
672 Config3 = 3, ///< Distribution 0/1, Fresnel
673 Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight
674 Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight
675 Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight
676 Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight
677 ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration
678 };
679
680 /// Selects which lighting components are affected by fresnel
681 enum class LightingFresnelSelector {
682 None = 0, ///< Fresnel is disabled
683 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel
684 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel
685 Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
686 };
687
688 /// Factor used to scale the output of a lighting LUT
689 enum class LightingScale {
690 Scale1 = 0, ///< Scale is 1x
691 Scale2 = 1, ///< Scale is 2x
692 Scale4 = 2, ///< Scale is 4x
693 Scale8 = 3, ///< Scale is 8x
694 Scale1_4 = 6, ///< Scale is 0.25x
695 Scale1_2 = 7, ///< Scale is 0.5x
696 };
697
698 enum class LightingLutInput {
699 NH = 0, // Cosine of the angle between the normal and half-angle vectors
700 VH = 1, // Cosine of the angle between the view and half-angle vectors
701 NV = 2, // Cosine of the angle between the normal and the view vector
702 LN = 3, // Cosine of the angle between the light and the normal vectors
703 };
704
705 enum class LightingBumpMode : u32 {
706 None = 0,
707 NormalMap = 1,
708 TangentMap = 2,
709 };
710
711 union LightColor {
712 BitField< 0, 10, u32> b;
713 BitField<10, 10, u32> g;
714 BitField<20, 10, u32> r;
715
716 Math::Vec3f ToVec3f() const {
717 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component
718 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
719 }
720 };
721
722 /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration
723 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
724 switch (sampler) {
725 case LightingSampler::Distribution0:
726 return (config != LightingConfig::Config1);
727
728 case LightingSampler::Distribution1:
729 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5);
730
731 case LightingSampler::Fresnel:
732 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4);
733
734 case LightingSampler::ReflectRed:
735 return (config != LightingConfig::Config3);
736
737 case LightingSampler::ReflectGreen:
738 case LightingSampler::ReflectBlue:
739 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
740 }
741 return false;
742 }
743
744 struct {
745 struct LightSrc {
746 LightColor specular_0; // material.specular_0 * light.specular_0
747 LightColor specular_1; // material.specular_1 * light.specular_1
748 LightColor diffuse; // material.diffuse * light.diffuse
749 LightColor ambient; // material.ambient * light.ambient
750
751 struct {
752 // Encoded as 16-bit floating point
753 union {
754 BitField< 0, 16, u32> x;
755 BitField<16, 16, u32> y;
756 };
757 union {
758 BitField< 0, 16, u32> z;
759 };
760
761 INSERT_PADDING_WORDS(0x3);
762
763 union {
764 BitField<0, 1, u32> directional;
765 BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
766 };
767 };
768
769 BitField<0, 20, u32> dist_atten_bias;
770 BitField<0, 20, u32> dist_atten_scale;
771
772 INSERT_PADDING_WORDS(0x4);
773 };
774 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words");
775
776 LightSrc light[8];
777 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
778 INSERT_PADDING_WORDS(0x1);
779 BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
780
781 union {
782 BitField< 2, 2, LightingFresnelSelector> fresnel_selector;
783 BitField< 4, 4, LightingConfig> config;
784 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
785 BitField<27, 1, u32> clamp_highlights;
786 BitField<28, 2, LightingBumpMode> bump_mode;
787 BitField<30, 1, u32> disable_bump_renorm;
788 };
789
790 union {
791 BitField<16, 1, u32> disable_lut_d0;
792 BitField<17, 1, u32> disable_lut_d1;
793 BitField<19, 1, u32> disable_lut_fr;
794 BitField<20, 1, u32> disable_lut_rr;
795 BitField<21, 1, u32> disable_lut_rg;
796 BitField<22, 1, u32> disable_lut_rb;
797
798 // Each bit specifies whether distance attenuation should be applied for the
799 // corresponding light
800
801 BitField<24, 1, u32> disable_dist_atten_light_0;
802 BitField<25, 1, u32> disable_dist_atten_light_1;
803 BitField<26, 1, u32> disable_dist_atten_light_2;
804 BitField<27, 1, u32> disable_dist_atten_light_3;
805 BitField<28, 1, u32> disable_dist_atten_light_4;
806 BitField<29, 1, u32> disable_dist_atten_light_5;
807 BitField<30, 1, u32> disable_dist_atten_light_6;
808 BitField<31, 1, u32> disable_dist_atten_light_7;
809 };
810
811 bool IsDistAttenDisabled(unsigned index) const {
812 const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1,
813 disable_dist_atten_light_2, disable_dist_atten_light_3,
814 disable_dist_atten_light_4, disable_dist_atten_light_5,
815 disable_dist_atten_light_6, disable_dist_atten_light_7 };
816 return disable[index] != 0;
817 }
818
819 union {
820 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
821 BitField<8, 5, u32> type; ///< Type of LUT for which to set data
822 } lut_config;
823
824 BitField<0, 1, u32> disable;
825 INSERT_PADDING_WORDS(0x1);
826
827 // When data is written to any of these registers, it gets written to the lookup table of
828 // the selected type at the selected index, specified above in the `lut_config` register.
829 // With each write, `lut_config.index` is incremented. It does not matter which of these
830 // registers is written to, the behavior will be the same.
831 u32 lut_data[8];
832
833 // These are used to specify if absolute (abs) value should be used for each LUT index. When
834 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
835 // the range of (0.0, 1.0).
836 union {
837 BitField< 1, 1, u32> disable_d0;
838 BitField< 5, 1, u32> disable_d1;
839 BitField< 9, 1, u32> disable_sp;
840 BitField<13, 1, u32> disable_fr;
841 BitField<17, 1, u32> disable_rb;
842 BitField<21, 1, u32> disable_rg;
843 BitField<25, 1, u32> disable_rr;
844 } abs_lut_input;
845
846 union {
847 BitField< 0, 3, LightingLutInput> d0;
848 BitField< 4, 3, LightingLutInput> d1;
849 BitField< 8, 3, LightingLutInput> sp;
850 BitField<12, 3, LightingLutInput> fr;
851 BitField<16, 3, LightingLutInput> rb;
852 BitField<20, 3, LightingLutInput> rg;
853 BitField<24, 3, LightingLutInput> rr;
854 } lut_input;
855
856 union {
857 BitField< 0, 3, LightingScale> d0;
858 BitField< 4, 3, LightingScale> d1;
859 BitField< 8, 3, LightingScale> sp;
860 BitField<12, 3, LightingScale> fr;
861 BitField<16, 3, LightingScale> rb;
862 BitField<20, 3, LightingScale> rg;
863 BitField<24, 3, LightingScale> rr;
864
865 static float GetScale(LightingScale scale) {
866 switch (scale) {
867 case LightingScale::Scale1:
868 return 1.0f;
869 case LightingScale::Scale2:
870 return 2.0f;
871 case LightingScale::Scale4:
872 return 4.0f;
873 case LightingScale::Scale8:
874 return 8.0f;
875 case LightingScale::Scale1_4:
876 return 0.25f;
877 case LightingScale::Scale1_2:
878 return 0.5f;
879 }
880 return 0.0f;
881 }
882 } lut_scale;
883
884 INSERT_PADDING_WORDS(0x6);
885
886 union {
887 // There are 8 light enable "slots", corresponding to the total number of lights
888 // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num'
889 // above), the first N slots below will be set to integers within the range of 0-7,
890 // corresponding to the actual light that is enabled for each slot.
891
892 BitField< 0, 3, u32> slot_0;
893 BitField< 4, 3, u32> slot_1;
894 BitField< 8, 3, u32> slot_2;
895 BitField<12, 3, u32> slot_3;
896 BitField<16, 3, u32> slot_4;
897 BitField<20, 3, u32> slot_5;
898 BitField<24, 3, u32> slot_6;
899 BitField<28, 3, u32> slot_7;
900
901 unsigned GetNum(unsigned index) const {
902 const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 };
903 return enable_slots[index];
904 }
905 } light_enable;
906 } lighting;
907
908 INSERT_PADDING_WORDS(0x26);
645 909
646 enum class VertexAttributeFormat : u64 { 910 enum class VertexAttributeFormat : u64 {
647 BYTE = 0, 911 BYTE = 0,
@@ -990,6 +1254,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68);
990ASSERT_REG_POSITION(texture0_enable, 0x80); 1254ASSERT_REG_POSITION(texture0_enable, 0x80);
991ASSERT_REG_POSITION(texture0, 0x81); 1255ASSERT_REG_POSITION(texture0, 0x81);
992ASSERT_REG_POSITION(texture0_format, 0x8e); 1256ASSERT_REG_POSITION(texture0_format, 0x8e);
1257ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f);
993ASSERT_REG_POSITION(texture1, 0x91); 1258ASSERT_REG_POSITION(texture1, 0x91);
994ASSERT_REG_POSITION(texture1_format, 0x96); 1259ASSERT_REG_POSITION(texture1_format, 0x96);
995ASSERT_REG_POSITION(texture2, 0x99); 1260ASSERT_REG_POSITION(texture2, 0x99);
@@ -1004,6 +1269,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8);
1004ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); 1269ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
1005ASSERT_REG_POSITION(output_merger, 0x100); 1270ASSERT_REG_POSITION(output_merger, 0x100);
1006ASSERT_REG_POSITION(framebuffer, 0x110); 1271ASSERT_REG_POSITION(framebuffer, 0x110);
1272ASSERT_REG_POSITION(lighting, 0x140);
1007ASSERT_REG_POSITION(vertex_attributes, 0x200); 1273ASSERT_REG_POSITION(vertex_attributes, 0x200);
1008ASSERT_REG_POSITION(index_array, 0x227); 1274ASSERT_REG_POSITION(index_array, 0x227);
1009ASSERT_REG_POSITION(num_vertices, 0x228); 1275ASSERT_REG_POSITION(num_vertices, 0x228);
@@ -1026,118 +1292,6 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st
1026static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); 1292static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
1027static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); 1293static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
1028 1294
1029struct float24 {
1030 static float24 FromFloat32(float val) {
1031 float24 ret;
1032 ret.value = val;
1033 return ret;
1034 }
1035
1036 // 16 bit mantissa, 7 bit exponent, 1 bit sign
1037 // TODO: No idea if this works as intended
1038 static float24 FromRawFloat24(u32 hex) {
1039 float24 ret;
1040 if ((hex & 0xFFFFFF) == 0) {
1041 ret.value = 0;
1042 } else {
1043 u32 mantissa = hex & 0xFFFF;
1044 u32 exponent = (hex >> 16) & 0x7F;
1045 u32 sign = hex >> 23;
1046 ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
1047 if (sign)
1048 ret.value = -ret.value;
1049 }
1050 return ret;
1051 }
1052
1053 static float24 Zero() {
1054 return FromFloat32(0.f);
1055 }
1056
1057 // Not recommended for anything but logging
1058 float ToFloat32() const {
1059 return value;
1060 }
1061
1062 float24 operator * (const float24& flt) const {
1063 if ((this->value == 0.f && !std::isnan(flt.value)) ||
1064 (flt.value == 0.f && !std::isnan(this->value)))
1065 // PICA gives 0 instead of NaN when multiplying by inf
1066 return Zero();
1067 return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
1068 }
1069
1070 float24 operator / (const float24& flt) const {
1071 return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
1072 }
1073
1074 float24 operator + (const float24& flt) const {
1075 return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
1076 }
1077
1078 float24 operator - (const float24& flt) const {
1079 return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
1080 }
1081
1082 float24& operator *= (const float24& flt) {
1083 if ((this->value == 0.f && !std::isnan(flt.value)) ||
1084 (flt.value == 0.f && !std::isnan(this->value)))
1085 // PICA gives 0 instead of NaN when multiplying by inf
1086 *this = Zero();
1087 else value *= flt.ToFloat32();
1088 return *this;
1089 }
1090
1091 float24& operator /= (const float24& flt) {
1092 value /= flt.ToFloat32();
1093 return *this;
1094 }
1095
1096 float24& operator += (const float24& flt) {
1097 value += flt.ToFloat32();
1098 return *this;
1099 }
1100
1101 float24& operator -= (const float24& flt) {
1102 value -= flt.ToFloat32();
1103 return *this;
1104 }
1105
1106 float24 operator - () const {
1107 return float24::FromFloat32(-ToFloat32());
1108 }
1109
1110 bool operator < (const float24& flt) const {
1111 return ToFloat32() < flt.ToFloat32();
1112 }
1113
1114 bool operator > (const float24& flt) const {
1115 return ToFloat32() > flt.ToFloat32();
1116 }
1117
1118 bool operator >= (const float24& flt) const {
1119 return ToFloat32() >= flt.ToFloat32();
1120 }
1121
1122 bool operator <= (const float24& flt) const {
1123 return ToFloat32() <= flt.ToFloat32();
1124 }
1125
1126 bool operator == (const float24& flt) const {
1127 return ToFloat32() == flt.ToFloat32();
1128 }
1129
1130 bool operator != (const float24& flt) const {
1131 return ToFloat32() != flt.ToFloat32();
1132 }
1133
1134private:
1135 // Stored as a regular float, merely for convenience
1136 // TODO: Perform proper arithmetic on this!
1137 float value;
1138};
1139static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
1140
1141/// Struct used to describe current Pica state 1295/// Struct used to describe current Pica state
1142struct State { 1296struct State {
1143 /// Pica registers 1297 /// Pica registers
@@ -1163,6 +1317,25 @@ struct State {
1163 ShaderSetup vs; 1317 ShaderSetup vs;
1164 ShaderSetup gs; 1318 ShaderSetup gs;
1165 1319
1320 struct {
1321 union LutEntry {
1322 // Used for raw access
1323 u32 raw;
1324
1325 // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
1326 BitField< 0, 12, u32> value;
1327
1328 // Used by HW for efficient interpolation, Citra does not use these
1329 BitField<12, 12, u32> difference;
1330
1331 float ToFloat() {
1332 return static_cast<float>(value) / 4095.f;
1333 }
1334 };
1335
1336 std::array<std::array<LutEntry, 256>, 24> luts;
1337 } lighting;
1338
1166 /// Current Pica command list 1339 /// Current Pica command list
1167 struct { 1340 struct {
1168 const u32* head_ptr; 1341 const u32* head_ptr;
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
new file mode 100644
index 000000000..ecf45654b
--- /dev/null
+++ b/src/video_core/pica_types.h
@@ -0,0 +1,146 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8
9#include "common/common_types.h"
10
11namespace Pica {
12
13/**
14 * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision
15 * floating point.
16 *
17 * When decoding, format is as follows:
18 * - The first `M` bits are the mantissa
19 * - The next `E` bits are the exponent
20 * - The last bit is the sign bit
21 *
22 * @todo Verify on HW if this conversion is sufficiently accurate.
23 */
24template<unsigned M, unsigned E>
25struct Float {
26public:
27 static Float<M, E> FromFloat32(float val) {
28 Float<M, E> ret;
29 ret.value = val;
30 return ret;
31 }
32
33 static Float<M, E> FromRaw(u32 hex) {
34 Float<M, E> res;
35
36 const int width = M + E + 1;
37 const int bias = 128 - (1 << (E - 1));
38 const int exponent = (hex >> M) & ((1 << E) - 1);
39 const unsigned mantissa = hex & ((1 << M) - 1);
40
41 if (hex & ((1 << (width - 1)) - 1))
42 hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23);
43 else
44 hex = ((hex >> (E + M)) << 31);
45
46 std::memcpy(&res.value, &hex, sizeof(float));
47
48 return res;
49 }
50
51 static Float<M, E> Zero() {
52 return FromFloat32(0.f);
53 }
54
55 // Not recommended for anything but logging
56 float ToFloat32() const {
57 return value;
58 }
59
60 Float<M, E> operator * (const Float<M, E>& flt) const {
61 if ((this->value == 0.f && !std::isnan(flt.value)) ||
62 (flt.value == 0.f && !std::isnan(this->value)))
63 // PICA gives 0 instead of NaN when multiplying by inf
64 return Zero();
65 return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32());
66 }
67
68 Float<M, E> operator / (const Float<M, E>& flt) const {
69 return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
70 }
71
72 Float<M, E> operator + (const Float<M, E>& flt) const {
73 return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
74 }
75
76 Float<M, E> operator - (const Float<M, E>& flt) const {
77 return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
78 }
79
80 Float<M, E>& operator *= (const Float<M, E>& flt) {
81 if ((this->value == 0.f && !std::isnan(flt.value)) ||
82 (flt.value == 0.f && !std::isnan(this->value)))
83 // PICA gives 0 instead of NaN when multiplying by inf
84 *this = Zero();
85 else value *= flt.ToFloat32();
86 return *this;
87 }
88
89 Float<M, E>& operator /= (const Float<M, E>& flt) {
90 value /= flt.ToFloat32();
91 return *this;
92 }
93
94 Float<M, E>& operator += (const Float<M, E>& flt) {
95 value += flt.ToFloat32();
96 return *this;
97 }
98
99 Float<M, E>& operator -= (const Float<M, E>& flt) {
100 value -= flt.ToFloat32();
101 return *this;
102 }
103
104 Float<M, E> operator - () const {
105 return Float<M, E>::FromFloat32(-ToFloat32());
106 }
107
108 bool operator < (const Float<M, E>& flt) const {
109 return ToFloat32() < flt.ToFloat32();
110 }
111
112 bool operator > (const Float<M, E>& flt) const {
113 return ToFloat32() > flt.ToFloat32();
114 }
115
116 bool operator >= (const Float<M, E>& flt) const {
117 return ToFloat32() >= flt.ToFloat32();
118 }
119
120 bool operator <= (const Float<M, E>& flt) const {
121 return ToFloat32() <= flt.ToFloat32();
122 }
123
124 bool operator == (const Float<M, E>& flt) const {
125 return ToFloat32() == flt.ToFloat32();
126 }
127
128 bool operator != (const Float<M, E>& flt) const {
129 return ToFloat32() != flt.ToFloat32();
130 }
131
132private:
133 static const unsigned MASK = (1 << (M + E + 1)) - 1;
134 static const unsigned MANTISSA_MASK = (1 << M) - 1;
135 static const unsigned EXPONENT_MASK = (1 << E) - 1;
136
137 // Stored as a regular float, merely for convenience
138 // TODO: Perform proper arithmetic on this!
139 float value;
140};
141
142using float24 = Float<16, 7>;
143using float20 = Float<12, 7>;
144using float16 = Float<10, 5>;
145
146} // namespace Pica
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6441e2586..b7d19bf94 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -75,6 +75,12 @@ void RasterizerOpenGL::InitObjects() {
75 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 75 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
77 77
78 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
79 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
80
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83
78 SetShader(); 84 SetShader();
79 85
80 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation 86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
@@ -120,6 +126,19 @@ void RasterizerOpenGL::InitObjects() {
120 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); 126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
121 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); 127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
122 128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 }
140 state.Apply();
141
123 ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, 142 ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE,
124 "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); 143 "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER));
125} 144}
@@ -139,12 +158,34 @@ void RasterizerOpenGL::Reset() {
139 res_cache.InvalidateAll(); 158 res_cache.InvalidateAll();
140} 159}
141 160
161/**
162 * This is a helper function to resolve an issue with opposite quaternions being interpolated by
163 * OpenGL. See below for a detailed description of this issue (yuriks):
164 *
165 * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
166 * interpolate two quaternions that are opposite, instead of going from one rotation to another
167 * using the shortest path, you'll go around the longest path. You can test if two quaternions are
168 * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore
169 * making Dot(-Q1, W2) positive.
170 *
171 * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This
172 * should be correct for nearly all cases, however a more correct implementation (but less trivial
173 * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions
174 * manually using two Lerps, and doing this correction before each Lerp.
175 */
176static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) {
177 Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() };
178 Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() };
179
180 return (Math::Dot(a, b) < 0.f);
181}
182
142void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, 183void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
143 const Pica::Shader::OutputVertex& v1, 184 const Pica::Shader::OutputVertex& v1,
144 const Pica::Shader::OutputVertex& v2) { 185 const Pica::Shader::OutputVertex& v2) {
145 vertex_batch.emplace_back(v0); 186 vertex_batch.emplace_back(v0, false);
146 vertex_batch.emplace_back(v1); 187 vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
147 vertex_batch.emplace_back(v2); 188 vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
148} 189}
149 190
150void RasterizerOpenGL::DrawTriangles() { 191void RasterizerOpenGL::DrawTriangles() {
@@ -156,6 +197,13 @@ void RasterizerOpenGL::DrawTriangles() {
156 state.draw.shader_dirty = false; 197 state.draw.shader_dirty = false;
157 } 198 }
158 199
200 for (unsigned index = 0; index < lighting_lut.size(); index++) {
201 if (uniform_block_data.lut_dirty[index]) {
202 SyncLightingLUT(index);
203 uniform_block_data.lut_dirty[index] = false;
204 }
205 }
206
159 if (uniform_block_data.dirty) { 207 if (uniform_block_data.dirty) {
160 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 208 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
161 uniform_block_data.dirty = false; 209 uniform_block_data.dirty = false;
@@ -283,6 +331,165 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
283 case PICA_REG_INDEX(tev_combiner_buffer_color): 331 case PICA_REG_INDEX(tev_combiner_buffer_color):
284 SyncCombinerColor(); 332 SyncCombinerColor();
285 break; 333 break;
334
335 // Fragment lighting specular 0 color
336 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10):
337 SyncLightSpecular0(0);
338 break;
339 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10):
340 SyncLightSpecular0(1);
341 break;
342 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10):
343 SyncLightSpecular0(2);
344 break;
345 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10):
346 SyncLightSpecular0(3);
347 break;
348 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10):
349 SyncLightSpecular0(4);
350 break;
351 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10):
352 SyncLightSpecular0(5);
353 break;
354 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10):
355 SyncLightSpecular0(6);
356 break;
357 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10):
358 SyncLightSpecular0(7);
359 break;
360
361 // Fragment lighting specular 1 color
362 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10):
363 SyncLightSpecular1(0);
364 break;
365 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10):
366 SyncLightSpecular1(1);
367 break;
368 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10):
369 SyncLightSpecular1(2);
370 break;
371 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10):
372 SyncLightSpecular1(3);
373 break;
374 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10):
375 SyncLightSpecular1(4);
376 break;
377 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10):
378 SyncLightSpecular1(5);
379 break;
380 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10):
381 SyncLightSpecular1(6);
382 break;
383 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10):
384 SyncLightSpecular1(7);
385 break;
386
387 // Fragment lighting diffuse color
388 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10):
389 SyncLightDiffuse(0);
390 break;
391 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10):
392 SyncLightDiffuse(1);
393 break;
394 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10):
395 SyncLightDiffuse(2);
396 break;
397 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10):
398 SyncLightDiffuse(3);
399 break;
400 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10):
401 SyncLightDiffuse(4);
402 break;
403 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10):
404 SyncLightDiffuse(5);
405 break;
406 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10):
407 SyncLightDiffuse(6);
408 break;
409 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10):
410 SyncLightDiffuse(7);
411 break;
412
413 // Fragment lighting ambient color
414 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10):
415 SyncLightAmbient(0);
416 break;
417 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10):
418 SyncLightAmbient(1);
419 break;
420 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10):
421 SyncLightAmbient(2);
422 break;
423 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10):
424 SyncLightAmbient(3);
425 break;
426 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10):
427 SyncLightAmbient(4);
428 break;
429 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10):
430 SyncLightAmbient(5);
431 break;
432 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10):
433 SyncLightAmbient(6);
434 break;
435 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10):
436 SyncLightAmbient(7);
437 break;
438
439 // Fragment lighting position
440 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10):
441 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10):
442 SyncLightPosition(0);
443 break;
444 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10):
445 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10):
446 SyncLightPosition(1);
447 break;
448 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10):
449 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10):
450 SyncLightPosition(2);
451 break;
452 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10):
453 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10):
454 SyncLightPosition(3);
455 break;
456 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10):
457 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10):
458 SyncLightPosition(4);
459 break;
460 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10):
461 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10):
462 SyncLightPosition(5);
463 break;
464 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10):
465 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10):
466 SyncLightPosition(6);
467 break;
468 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10):
469 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10):
470 SyncLightPosition(7);
471 break;
472
473 // Fragment lighting global ambient color (emission + ambient * ambient)
474 case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0):
475 SyncGlobalAmbient();
476 break;
477
478 // Fragment lighting lookup tables
479 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
480 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
481 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
482 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
483 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
484 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
485 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
486 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
487 {
488 auto& lut_config = regs.lighting.lut_config;
489 uniform_block_data.lut_dirty[lut_config.type / 4] = true;
490 break;
491 }
492
286 } 493 }
287} 494}
288 495
@@ -491,18 +698,39 @@ void RasterizerOpenGL::SetShader() {
491 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); 698 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
492 if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } 699 if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); }
493 700
701 // Set the texture samplers to correspond to different lookup table texture units
702 GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
703 if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); }
704 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
705 if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); }
706 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
707 if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); }
708 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
709 if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); }
710 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
711 if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); }
712 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
713 if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); }
714
494 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); 715 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
495 716
496 unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); 717 unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
497 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 718 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
498 }
499 719
500 // Update uniforms 720 // Update uniforms
501 SyncAlphaTest(); 721 SyncAlphaTest();
502 SyncCombinerColor(); 722 SyncCombinerColor();
503 auto& tev_stages = Pica::g_state.regs.GetTevStages(); 723 auto& tev_stages = Pica::g_state.regs.GetTevStages();
504 for (int index = 0; index < tev_stages.size(); ++index) 724 for (int index = 0; index < tev_stages.size(); ++index)
505 SyncTevConstColor(index, tev_stages[index]); 725 SyncTevConstColor(index, tev_stages[index]);
726
727 SyncGlobalAmbient();
728 for (int light_index = 0; light_index < 8; light_index++) {
729 SyncLightDiffuse(light_index);
730 SyncLightAmbient(light_index);
731 SyncLightPosition(light_index);
732 }
733 }
506} 734}
507 735
508void RasterizerOpenGL::SyncFramebuffer() { 736void RasterizerOpenGL::SyncFramebuffer() {
@@ -604,8 +832,8 @@ void RasterizerOpenGL::SyncCullMode() {
604} 832}
605 833
606void RasterizerOpenGL::SyncDepthModifiers() { 834void RasterizerOpenGL::SyncDepthModifiers() {
607 float depth_scale = -Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 835 float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
608 float depth_offset = Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; 836 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f;
609 837
610 // TODO: Implement scale modifier 838 // TODO: Implement scale modifier
611 uniform_block_data.data.depth_offset = depth_offset; 839 uniform_block_data.data.depth_offset = depth_offset;
@@ -683,12 +911,81 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS
683 } 911 }
684} 912}
685 913
914void RasterizerOpenGL::SyncGlobalAmbient() {
915 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient);
916 if (color != uniform_block_data.data.lighting_global_ambient) {
917 uniform_block_data.data.lighting_global_ambient = color;
918 uniform_block_data.dirty = true;
919 }
920}
921
922void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
923 std::array<GLvec4, 256> new_data;
924
925 for (unsigned offset = 0; offset < new_data.size(); ++offset) {
926 new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat();
927 new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat();
928 new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat();
929 new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat();
930 }
931
932 if (new_data != lighting_lut_data[lut_index]) {
933 lighting_lut_data[lut_index] = new_data;
934 glActiveTexture(GL_TEXTURE3 + lut_index);
935 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data());
936 }
937}
938
939void RasterizerOpenGL::SyncLightSpecular0(int light_index) {
940 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
941 if (color != uniform_block_data.data.light_src[light_index].specular_0) {
942 uniform_block_data.data.light_src[light_index].specular_0 = color;
943 uniform_block_data.dirty = true;
944 }
945}
946
947void RasterizerOpenGL::SyncLightSpecular1(int light_index) {
948 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
949 if (color != uniform_block_data.data.light_src[light_index].specular_1) {
950 uniform_block_data.data.light_src[light_index].specular_1 = color;
951 uniform_block_data.dirty = true;
952 }
953}
954
955void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
956 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
957 if (color != uniform_block_data.data.light_src[light_index].diffuse) {
958 uniform_block_data.data.light_src[light_index].diffuse = color;
959 uniform_block_data.dirty = true;
960 }
961}
962
963void RasterizerOpenGL::SyncLightAmbient(int light_index) {
964 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
965 if (color != uniform_block_data.data.light_src[light_index].ambient) {
966 uniform_block_data.data.light_src[light_index].ambient = color;
967 uniform_block_data.dirty = true;
968 }
969}
970
971void RasterizerOpenGL::SyncLightPosition(int light_index) {
972 GLvec3 position = {
973 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
974 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
975 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() };
976
977 if (position != uniform_block_data.data.light_src[light_index].position) {
978 uniform_block_data.data.light_src[light_index].position = position;
979 uniform_block_data.dirty = true;
980 }
981}
982
686void RasterizerOpenGL::SyncDrawState() { 983void RasterizerOpenGL::SyncDrawState() {
687 const auto& regs = Pica::g_state.regs; 984 const auto& regs = Pica::g_state.regs;
688 985
689 // Sync the viewport 986 // Sync the viewport
690 GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; 987 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
691 GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; 988 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
692 989
693 // OpenGL uses different y coordinates, so negate corner offset and flip origin 990 // OpenGL uses different y coordinates, so negate corner offset and flip origin
694 // TODO: Ensure viewport_corner.x should not be negated or origin flipped 991 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 569beaa5c..fef5f5331 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -17,6 +17,7 @@
17#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 18#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
19#include "video_core/renderer_opengl/gl_state.h" 19#include "video_core/renderer_opengl/gl_state.h"
20#include "video_core/renderer_opengl/pica_to_gl.h"
20#include "video_core/shader/shader_interpreter.h" 21#include "video_core/shader/shader_interpreter.h"
21 22
22/** 23/**
@@ -71,6 +72,59 @@ struct PicaShaderConfig {
71 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 72 regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
72 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; 73 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
73 74
75 // Fragment lighting
76
77 res.lighting.enable = !regs.lighting.disable;
78 res.lighting.src_num = regs.lighting.num_lights + 1;
79
80 for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) {
81 unsigned num = regs.lighting.light_enable.GetNum(light_index);
82 const auto& light = regs.lighting.light[num];
83 res.lighting.light[light_index].num = num;
84 res.lighting.light[light_index].directional = light.directional != 0;
85 res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
86 res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
87 res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
88 res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
89 }
90
91 res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
92 res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
93 res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
94 res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
95
96 res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
97 res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
98 res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
99 res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
100
101 res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
102 res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
103 res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
104 res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
105
106 res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
107 res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
108 res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
109 res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
110
111 res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
112 res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
113 res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
114 res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
115
116 res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
117 res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
118 res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
119 res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
120
121 res.lighting.config = regs.lighting.config;
122 res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
123 res.lighting.bump_mode = regs.lighting.bump_mode;
124 res.lighting.bump_selector = regs.lighting.bump_selector;
125 res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
126 res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
127
74 return res; 128 return res;
75 } 129 }
76 130
@@ -86,9 +140,37 @@ struct PicaShaderConfig {
86 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; 140 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0;
87 }; 141 };
88 142
89 Pica::Regs::CompareFunc alpha_test_func; 143 Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never;
90 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; 144 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
91 u8 combiner_buffer_input; 145 u8 combiner_buffer_input = 0;
146
147 struct {
148 struct {
149 unsigned num = 0;
150 bool directional = false;
151 bool two_sided_diffuse = false;
152 bool dist_atten_enable = false;
153 GLfloat dist_atten_scale = 0.0f;
154 GLfloat dist_atten_bias = 0.0f;
155 } light[8];
156
157 bool enable = false;
158 unsigned src_num = 0;
159 Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
160 unsigned bump_selector = 0;
161 bool bump_renorm = false;
162 bool clamp_highlights = false;
163
164 Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
165 Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
166
167 struct {
168 bool enable = false;
169 bool abs_input = false;
170 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
171 float scale = 1.0f;
172 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
173 } lighting;
92}; 174};
93 175
94namespace std { 176namespace std {
@@ -167,7 +249,7 @@ private:
167 249
168 /// Structure that the hardware rendered vertices are composed of 250 /// Structure that the hardware rendered vertices are composed of
169 struct HardwareVertex { 251 struct HardwareVertex {
170 HardwareVertex(const Pica::Shader::OutputVertex& v) { 252 HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
171 position[0] = v.pos.x.ToFloat32(); 253 position[0] = v.pos.x.ToFloat32();
172 position[1] = v.pos.y.ToFloat32(); 254 position[1] = v.pos.y.ToFloat32();
173 position[2] = v.pos.z.ToFloat32(); 255 position[2] = v.pos.z.ToFloat32();
@@ -182,6 +264,19 @@ private:
182 tex_coord1[1] = v.tc1.y.ToFloat32(); 264 tex_coord1[1] = v.tc1.y.ToFloat32();
183 tex_coord2[0] = v.tc2.x.ToFloat32(); 265 tex_coord2[0] = v.tc2.x.ToFloat32();
184 tex_coord2[1] = v.tc2.y.ToFloat32(); 266 tex_coord2[1] = v.tc2.y.ToFloat32();
267 normquat[0] = v.quat.x.ToFloat32();
268 normquat[1] = v.quat.y.ToFloat32();
269 normquat[2] = v.quat.z.ToFloat32();
270 normquat[3] = v.quat.w.ToFloat32();
271 view[0] = v.view.x.ToFloat32();
272 view[1] = v.view.y.ToFloat32();
273 view[2] = v.view.z.ToFloat32();
274
275 if (flip_quaternion) {
276 for (float& x : normquat) {
277 x = -x;
278 }
279 }
185 } 280 }
186 281
187 GLfloat position[4]; 282 GLfloat position[4];
@@ -189,20 +284,31 @@ private:
189 GLfloat tex_coord0[2]; 284 GLfloat tex_coord0[2];
190 GLfloat tex_coord1[2]; 285 GLfloat tex_coord1[2];
191 GLfloat tex_coord2[2]; 286 GLfloat tex_coord2[2];
287 GLfloat normquat[4];
288 GLfloat view[3];
289 };
290
291 struct LightSrc {
292 alignas(16) GLvec3 specular_0;
293 alignas(16) GLvec3 specular_1;
294 alignas(16) GLvec3 diffuse;
295 alignas(16) GLvec3 ambient;
296 alignas(16) GLvec3 position;
192 }; 297 };
193 298
194 /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned 299 /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
195 struct UniformData { 300 struct UniformData {
196 // A vec4 color for each of the six tev stages 301 // A vec4 color for each of the six tev stages
197 std::array<GLfloat, 4> const_color[6]; 302 GLvec4 const_color[6];
198 std::array<GLfloat, 4> tev_combiner_buffer_color; 303 GLvec4 tev_combiner_buffer_color;
199 GLint alphatest_ref; 304 GLint alphatest_ref;
200 GLfloat depth_offset; 305 GLfloat depth_offset;
201 INSERT_PADDING_BYTES(8); 306 alignas(16) GLvec3 lighting_global_ambient;
307 LightSrc light_src[8];
202 }; 308 };
203 309
204 static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader"); 310 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
205 static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec"); 311 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
206 312
207 /// Reconfigure the OpenGL color texture to use the given format and dimensions 313 /// Reconfigure the OpenGL color texture to use the given format and dimensions
208 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); 314 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
@@ -249,6 +355,27 @@ private:
249 /// Syncs the TEV combiner color buffer to match the PICA register 355 /// Syncs the TEV combiner color buffer to match the PICA register
250 void SyncCombinerColor(); 356 void SyncCombinerColor();
251 357
358 /// Syncs the lighting global ambient color to match the PICA register
359 void SyncGlobalAmbient();
360
361 /// Syncs the lighting lookup tables
362 void SyncLightingLUT(unsigned index);
363
364 /// Syncs the specified light's diffuse color to match the PICA register
365 void SyncLightDiffuse(int light_index);
366
367 /// Syncs the specified light's ambient color to match the PICA register
368 void SyncLightAmbient(int light_index);
369
370 /// Syncs the specified light's position to match the PICA register
371 void SyncLightPosition(int light_index);
372
373 /// Syncs the specified light's specular 0 color to match the PICA register
374 void SyncLightSpecular0(int light_index);
375
376 /// Syncs the specified light's specular 1 color to match the PICA register
377 void SyncLightSpecular1(int light_index);
378
252 /// Syncs the remaining OpenGL drawing state to match the current PICA state 379 /// Syncs the remaining OpenGL drawing state to match the current PICA state
253 void SyncDrawState(); 380 void SyncDrawState();
254 381
@@ -291,6 +418,7 @@ private:
291 418
292 struct { 419 struct {
293 UniformData data; 420 UniformData data;
421 bool lut_dirty[6];
294 bool dirty; 422 bool dirty;
295 } uniform_block_data; 423 } uniform_block_data;
296 424
@@ -298,4 +426,7 @@ private:
298 OGLBuffer vertex_buffer; 426 OGLBuffer vertex_buffer;
299 OGLBuffer uniform_buffer; 427 OGLBuffer uniform_buffer;
300 OGLFramebuffer framebuffer; 428 OGLFramebuffer framebuffer;
429
430 std::array<OGLTexture, 6> lighting_lut;
431 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
301}; 432};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 22022f7f4..ee4b54ab9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -32,12 +32,10 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
32 out += "primary_color"; 32 out += "primary_color";
33 break; 33 break;
34 case Source::PrimaryFragmentColor: 34 case Source::PrimaryFragmentColor:
35 // HACK: Until we implement fragment lighting, use primary_color 35 out += "primary_fragment_color";
36 out += "primary_color";
37 break; 36 break;
38 case Source::SecondaryFragmentColor: 37 case Source::SecondaryFragmentColor:
39 // HACK: Until we implement fragment lighting, use zero 38 out += "secondary_fragment_color";
40 out += "vec4(0.0)";
41 break; 39 break;
42 case Source::Texture0: 40 case Source::Texture0:
43 out += "texture(tex[0], texcoord[0])"; 41 out += "texture(tex[0], texcoord[0])";
@@ -320,26 +318,229 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
320 out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; 318 out += "next_combiner_buffer.a = last_tex_env_out.a;\n";
321} 319}
322 320
321/// Writes the code to emulate fragment lighting
322static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
323 // Define lighting globals
324 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
325 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
326 "vec3 light_vector = vec3(0.0);\n"
327 "vec3 refl_value = vec3(0.0);\n";
328
329 // Compute fragment normals
330 if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
331 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
332 std::string bump_selector = std::to_string(config.lighting.bump_selector);
333 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
334
335 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
336 if (config.lighting.bump_renorm) {
337 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
338 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
339 }
340 } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
341 // Bump mapping is enabled using a tangent map
342 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
343 UNIMPLEMENTED();
344 } else {
345 // No bump mapping - surface local normal is just a unit normal
346 out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
347 }
348
349 // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace
350 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
351
352 // Gets the index into the specified lookup table for specular lighting
353 auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) {
354 const std::string half_angle = "normalize(normalize(view) + light_vector)";
355 std::string index;
356 switch (input) {
357 case Regs::LightingLutInput::NH:
358 index = "dot(normal, " + half_angle + ")";
359 break;
360
361 case Regs::LightingLutInput::VH:
362 index = std::string("dot(normalize(view), " + half_angle + ")");
363 break;
364
365 case Regs::LightingLutInput::NV:
366 index = std::string("dot(normal, normalize(view))");
367 break;
368
369 case Regs::LightingLutInput::LN:
370 index = std::string("dot(light_vector, normal)");
371 break;
372
373 default:
374 LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
375 UNIMPLEMENTED();
376 break;
377 }
378
379 if (abs) {
380 // LUT index is in the range of (0.0, 1.0)
381 index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
382 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
383 } else {
384 // LUT index is in the range of (-1.0, 1.0)
385 index = "clamp(" + index + ", -1.0, 1.0)";
386 return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)";
387 }
388
389 return std::string();
390 };
391
392 // Gets the lighting lookup table value given the specified sampler and index
393 auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) {
394 return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " +
395 lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]");
396 };
397
398 // Write the code to emulate each enabled light
399 for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) {
400 const auto& light_config = config.lighting.light[light_index];
401 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
402
403 // Compute light vector (directional or positional)
404 if (light_config.directional)
405 out += "light_vector = normalize(" + light_src + ".position);\n";
406 else
407 out += "light_vector = normalize(" + light_src + ".position + view);\n";
408
409 // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided
410 std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)";
411
412 // If enabled, compute distance attenuation value
413 std::string dist_atten = "1.0";
414 if (light_config.dist_atten_enable) {
415 std::string scale = std::to_string(light_config.dist_atten_scale);
416 std::string bias = std::to_string(light_config.dist_atten_bias);
417 std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
418 index = "((clamp(" + index + ", 0.0, FLOAT_255)))";
419 const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
420 dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index);
421 }
422
423 // If enabled, clamp specular component if lighting result is negative
424 std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
425
426 // Specular 0 component
427 std::string d0_lut_value = "1.0";
428 if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
429 // Lookup specular "distribution 0" LUT value
430 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
431 d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
432 }
433 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
434
435 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
436 if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
437 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input);
438 std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
439 out += "refl_value.r = " + value + ";\n";
440 } else {
441 out += "refl_value.r = 1.0;\n";
442 }
443
444 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
445 if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
446 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input);
447 std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
448 out += "refl_value.g = " + value + ";\n";
449 } else {
450 out += "refl_value.g = refl_value.r;\n";
451 }
452
453 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
454 if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
455 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input);
456 std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
457 out += "refl_value.b = " + value + ";\n";
458 } else {
459 out += "refl_value.b = refl_value.r;\n";
460 }
461
462 // Specular 1 component
463 std::string d1_lut_value = "1.0";
464 if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
465 // Lookup specular "distribution 1" LUT value
466 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input);
467 d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
468 }
469 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
470
471 // Fresnel
472 if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
473 // Lookup fresnel LUT value
474 std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input);
475 std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
476
477 // Enabled for difffuse lighting alpha component
478 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
479 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
480 out += "diffuse_sum.a *= " + value + ";\n";
481
482 // Enabled for the specular lighting alpha component
483 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
484 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
485 out += "specular_sum.a *= " + value + ";\n";
486 }
487
488 // Compute primary fragment color (diffuse lighting) function
489 out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n";
490
491 // Compute secondary fragment color (specular lighting) function
492 out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n";
493 }
494
495 // Sum final lighting result
496 out += "diffuse_sum.rgb += lighting_global_ambient;\n";
497 out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n";
498 out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n";
499}
500
323std::string GenerateFragmentShader(const PicaShaderConfig& config) { 501std::string GenerateFragmentShader(const PicaShaderConfig& config) {
324 std::string out = R"( 502 std::string out = R"(
325#version 330 core 503#version 330 core
326#define NUM_TEV_STAGES 6 504#define NUM_TEV_STAGES 6
505#define NUM_LIGHTS 8
506#define LIGHTING_LUT_SIZE 256
507#define FLOAT_255 (255.0 / 256.0)
327 508
328in vec4 primary_color; 509in vec4 primary_color;
329in vec2 texcoord[3]; 510in vec2 texcoord[3];
511in vec4 normquat;
512in vec3 view;
330 513
331out vec4 color; 514out vec4 color;
332 515
516struct LightSrc {
517 vec3 specular_0;
518 vec3 specular_1;
519 vec3 diffuse;
520 vec3 ambient;
521 vec3 position;
522};
523
333layout (std140) uniform shader_data { 524layout (std140) uniform shader_data {
334 vec4 const_color[NUM_TEV_STAGES]; 525 vec4 const_color[NUM_TEV_STAGES];
335 vec4 tev_combiner_buffer_color; 526 vec4 tev_combiner_buffer_color;
336 int alphatest_ref; 527 int alphatest_ref;
337 float depth_offset; 528 float depth_offset;
529 vec3 lighting_global_ambient;
530 LightSrc light_src[NUM_LIGHTS];
338}; 531};
339 532
340uniform sampler2D tex[3]; 533uniform sampler2D tex[3];
534uniform sampler1D lut[6];
535
536// Rotate the vector v by the quaternion q
537vec3 quaternion_rotate(vec4 q, vec3 v) {
538 return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
539}
341 540
342void main() { 541void main() {
542vec4 primary_fragment_color = vec4(0.0);
543vec4 secondary_fragment_color = vec4(0.0);
343)"; 544)";
344 545
345 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test 546 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
@@ -348,6 +549,9 @@ void main() {
348 return out; 549 return out;
349 } 550 }
350 551
552 if (config.lighting.enable)
553 WriteLighting(out, config);
554
351 out += "vec4 combiner_buffer = vec4(0.0);\n"; 555 out += "vec4 combiner_buffer = vec4(0.0);\n";
352 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; 556 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
353 out += "vec4 last_tex_env_out = vec4(0.0);\n"; 557 out += "vec4 last_tex_env_out = vec4(0.0);\n";
@@ -369,21 +573,28 @@ void main() {
369 573
370std::string GenerateVertexShader() { 574std::string GenerateVertexShader() {
371 std::string out = "#version 330 core\n"; 575 std::string out = "#version 330 core\n";
576
372 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 577 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
373 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 578 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
374 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 579 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
375 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 580 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
376 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 581 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
582 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
583 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
377 584
378 out += R"( 585 out += R"(
379out vec4 primary_color; 586out vec4 primary_color;
380out vec2 texcoord[3]; 587out vec2 texcoord[3];
588out vec4 normquat;
589out vec3 view;
381 590
382void main() { 591void main() {
383 primary_color = vert_color; 592 primary_color = vert_color;
384 texcoord[0] = vert_texcoord0; 593 texcoord[0] = vert_texcoord0;
385 texcoord[1] = vert_texcoord1; 594 texcoord[1] = vert_texcoord1;
386 texcoord[2] = vert_texcoord2; 595 texcoord[2] = vert_texcoord2;
596 normquat = vert_normquat;
597 view = vert_view;
387 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); 598 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
388} 599}
389)"; 600)";
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 046aae14f..097242f6f 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,8 @@ enum Attributes {
14 ATTRIBUTE_TEXCOORD0, 14 ATTRIBUTE_TEXCOORD0,
15 ATTRIBUTE_TEXCOORD1, 15 ATTRIBUTE_TEXCOORD1,
16 ATTRIBUTE_TEXCOORD2, 16 ATTRIBUTE_TEXCOORD2,
17 ATTRIBUTE_NORMQUAT,
18 ATTRIBUTE_VIEW,
17}; 19};
18 20
19/** 21/**
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index a82372995..ab4b6c7b1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -170,6 +170,14 @@ void OpenGLState::Apply() {
170 } 170 }
171 } 171 }
172 172
173 // Lighting LUTs
174 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) {
175 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) {
176 glActiveTexture(GL_TEXTURE3 + i);
177 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d);
178 }
179 }
180
173 // Framebuffer 181 // Framebuffer
174 if (draw.framebuffer != cur_state.draw.framebuffer) { 182 if (draw.framebuffer != cur_state.draw.framebuffer) {
175 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 183 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b8ab45bb8..e848058d7 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -62,6 +62,10 @@ public:
62 } texture_units[3]; 62 } texture_units[3];
63 63
64 struct { 64 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6];
67
68 struct {
65 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
66 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
67 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 04c1d1a34..3d6c4e9e5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -10,6 +10,9 @@
10 10
11#include "video_core/pica.h" 11#include "video_core/pica.h"
12 12
13using GLvec3 = std::array<GLfloat, 3>;
14using GLvec4 = std::array<GLfloat, 4>;
15
13namespace PicaToGL { 16namespace PicaToGL {
14 17
15inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { 18inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
@@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) {
175 return stencil_op_table[(unsigned)action]; 178 return stencil_op_table[(unsigned)action];
176} 179}
177 180
178inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { 181inline GLvec4 ColorRGBA8(const u32 color) {
179 return { { (color >> 0 & 0xFF) / 255.0f, 182 return { { (color >> 0 & 0xFF) / 255.0f,
180 (color >> 8 & 0xFF) / 255.0f, 183 (color >> 8 & 0xFF) / 255.0f,
181 (color >> 16 & 0xFF) / 255.0f, 184 (color >> 16 & 0xFF) / 255.0f,
@@ -183,4 +186,11 @@ inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) {
183 } }; 186 } };
184} 187}
185 188
189inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) {
190 return { { color.r / 255.0f,
191 color.g / 255.0f,
192 color.b / 255.0f
193 } };
194}
195
186} // namespace 196} // namespace
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a6a38f0af..ca3a6a6b4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -81,8 +81,8 @@ struct ScreenRectVertex {
81 * The projection part of the matrix is trivial, hence these operations are represented 81 * The projection part of the matrix is trivial, hence these operations are represented
82 * by a 3x2 matrix. 82 * by a 3x2 matrix.
83 */ 83 */
84static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) { 84static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
85 std::array<GLfloat, 3*2> matrix; 85 std::array<GLfloat, 3 * 2> matrix;
86 86
87 matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; 87 matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
88 matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; 88 matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 59f54236b..44c234ed8 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -134,11 +134,13 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
134 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); 134 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
135 } 135 }
136 136
137 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", 137 LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
138 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
138 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 139 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
139 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), 140 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
140 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 141 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
141 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); 142 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
143 ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
142 144
143 return ret; 145 return ret;
144} 146}
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 1c6fa592c..f068cd93f 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -37,17 +37,19 @@ struct OutputVertex {
37 Math::Vec4<float24> color; 37 Math::Vec4<float24> color;
38 Math::Vec2<float24> tc0; 38 Math::Vec2<float24> tc0;
39 Math::Vec2<float24> tc1; 39 Math::Vec2<float24> tc1;
40 float24 pad[6]; 40 INSERT_PADDING_WORDS(2);
41 Math::Vec3<float24> view;
42 INSERT_PADDING_WORDS(1);
41 Math::Vec2<float24> tc2; 43 Math::Vec2<float24> tc2;
42 44
43 // Padding for optimal alignment 45 // Padding for optimal alignment
44 float24 pad2[4]; 46 INSERT_PADDING_WORDS(4);
45 47
46 // Attributes used to store intermediate results 48 // Attributes used to store intermediate results
47 49
48 // position after perspective divide 50 // position after perspective divide
49 Math::Vec3<float24> screenpos; 51 Math::Vec3<float24> screenpos;
50 float24 pad3; 52 INSERT_PADDING_WORDS(1);
51 53
52 // Linear interpolation 54 // Linear interpolation
53 // factor: 0=this, 1=vtx 55 // factor: 0=this, 1=vtx