diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/command_processor.cpp | 70 | ||||
| -rw-r--r-- | src/video_core/pica.h | 78 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shaders.h | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 31 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 29 |
10 files changed, 262 insertions, 72 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 29ba6b769..b46fadd9f 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -56,7 +56,17 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 56 | // Trigger IRQ | 56 | // Trigger IRQ |
| 57 | case PICA_REG_INDEX(trigger_irq): | 57 | case PICA_REG_INDEX(trigger_irq): |
| 58 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); | 58 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); |
| 59 | return; | 59 | break; |
| 60 | |||
| 61 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): | ||
| 62 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): | ||
| 63 | { | ||
| 64 | unsigned index = id - PICA_REG_INDEX(command_buffer.trigger[0]); | ||
| 65 | u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); | ||
| 66 | g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; | ||
| 67 | g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); | ||
| 68 | break; | ||
| 69 | } | ||
| 60 | 70 | ||
| 61 | // It seems like these trigger vertex rendering | 71 | // It seems like these trigger vertex rendering |
| 62 | case PICA_REG_INDEX(trigger_draw): | 72 | case PICA_REG_INDEX(trigger_draw): |
| @@ -363,38 +373,34 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 363 | g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id)); | 373 | g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id)); |
| 364 | } | 374 | } |
| 365 | 375 | ||
| 366 | static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { | ||
| 367 | const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]); | ||
| 368 | |||
| 369 | u32* read_pointer = (u32*)first_command_word; | ||
| 370 | |||
| 371 | const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) | | ||
| 372 | ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) | | ||
| 373 | ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) | | ||
| 374 | ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u); | ||
| 375 | |||
| 376 | WritePicaReg(header.cmd_id, *read_pointer, write_mask); | ||
| 377 | read_pointer += 2; | ||
| 378 | |||
| 379 | for (unsigned int i = 1; i < 1+header.extra_data_length; ++i) { | ||
| 380 | u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); | ||
| 381 | WritePicaReg(cmd, *read_pointer, write_mask); | ||
| 382 | ++read_pointer; | ||
| 383 | } | ||
| 384 | |||
| 385 | // align read pointer to 8 bytes | ||
| 386 | if ((first_command_word - read_pointer) % 2) | ||
| 387 | ++read_pointer; | ||
| 388 | |||
| 389 | return read_pointer - first_command_word; | ||
| 390 | } | ||
| 391 | |||
| 392 | void ProcessCommandList(const u32* list, u32 size) { | 376 | void ProcessCommandList(const u32* list, u32 size) { |
| 393 | u32* read_pointer = (u32*)list; | 377 | g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = list; |
| 394 | u32 list_length = size / sizeof(u32); | 378 | g_state.cmd_list.length = size / sizeof(u32); |
| 395 | 379 | ||
| 396 | while (read_pointer < list + list_length) { | 380 | while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) { |
| 397 | read_pointer += ExecuteCommandBlock(read_pointer); | 381 | // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF |
| 382 | static const u32 expand_bits_to_bytes[] = { | ||
| 383 | 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, | ||
| 384 | 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, | ||
| 385 | 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, | ||
| 386 | 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff | ||
| 387 | }; | ||
| 388 | |||
| 389 | // Align read pointer to 8 bytes | ||
| 390 | if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0) | ||
| 391 | ++g_state.cmd_list.current_ptr; | ||
| 392 | |||
| 393 | u32 value = *g_state.cmd_list.current_ptr++; | ||
| 394 | const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; | ||
| 395 | const u32 write_mask = expand_bits_to_bytes[header.parameter_mask]; | ||
| 396 | u32 cmd = header.cmd_id; | ||
| 397 | |||
| 398 | WritePicaReg(cmd, value, write_mask); | ||
| 399 | |||
| 400 | for (unsigned i = 0; i < header.extra_data_length; ++i) { | ||
| 401 | u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); | ||
| 402 | WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, write_mask); | ||
| 403 | } | ||
| 398 | } | 404 | } |
| 399 | } | 405 | } |
| 400 | 406 | ||
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 6ebeb08f7..8ad47a928 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -162,6 +162,25 @@ struct Regs { | |||
| 162 | ETC1A4 = 13, // compressed | 162 | ETC1A4 = 13, // compressed |
| 163 | }; | 163 | }; |
| 164 | 164 | ||
| 165 | enum class LogicOp : u32 { | ||
| 166 | Clear = 0, | ||
| 167 | And = 1, | ||
| 168 | AndReverse = 2, | ||
| 169 | Copy = 3, | ||
| 170 | Set = 4, | ||
| 171 | CopyInverted = 5, | ||
| 172 | NoOp = 6, | ||
| 173 | Invert = 7, | ||
| 174 | Nand = 8, | ||
| 175 | Or = 9, | ||
| 176 | Nor = 10, | ||
| 177 | Xor = 11, | ||
| 178 | Equiv = 12, | ||
| 179 | AndInverted = 13, | ||
| 180 | OrReverse = 14, | ||
| 181 | OrInverted = 15, | ||
| 182 | }; | ||
| 183 | |||
| 165 | static unsigned NibblesPerPixel(TextureFormat format) { | 184 | static unsigned NibblesPerPixel(TextureFormat format) { |
| 166 | switch (format) { | 185 | switch (format) { |
| 167 | case TextureFormat::RGBA8: | 186 | case TextureFormat::RGBA8: |
| @@ -221,6 +240,7 @@ struct Regs { | |||
| 221 | enum class Source : u32 { | 240 | enum class Source : u32 { |
| 222 | PrimaryColor = 0x0, | 241 | PrimaryColor = 0x0, |
| 223 | PrimaryFragmentColor = 0x1, | 242 | PrimaryFragmentColor = 0x1, |
| 243 | SecondaryFragmentColor = 0x2, | ||
| 224 | 244 | ||
| 225 | Texture0 = 0x3, | 245 | Texture0 = 0x3, |
| 226 | Texture1 = 0x4, | 246 | Texture1 = 0x4, |
| @@ -413,12 +433,8 @@ struct Regs { | |||
| 413 | } alpha_blending; | 433 | } alpha_blending; |
| 414 | 434 | ||
| 415 | union { | 435 | union { |
| 416 | enum Op { | 436 | BitField<0, 4, LogicOp> logic_op; |
| 417 | Set = 4, | 437 | }; |
| 418 | }; | ||
| 419 | |||
| 420 | BitField<0, 4, Op> op; | ||
| 421 | } logic_op; | ||
| 422 | 438 | ||
| 423 | union { | 439 | union { |
| 424 | BitField< 0, 8, u32> r; | 440 | BitField< 0, 8, u32> r; |
| @@ -708,7 +724,33 @@ struct Regs { | |||
| 708 | u32 set_value[3]; | 724 | u32 set_value[3]; |
| 709 | } vs_default_attributes_setup; | 725 | } vs_default_attributes_setup; |
| 710 | 726 | ||
| 711 | INSERT_PADDING_WORDS(0x28); | 727 | INSERT_PADDING_WORDS(0x2); |
| 728 | |||
| 729 | struct { | ||
| 730 | // There are two channels that can be used to configure the next command buffer, which | ||
| 731 | // can be then executed by writing to the "trigger" registers. There are two reasons why a | ||
| 732 | // game might use this feature: | ||
| 733 | // 1) With this, an arbitrary number of additional command buffers may be executed in | ||
| 734 | // sequence without requiring any intervention of the CPU after the initial one is | ||
| 735 | // kicked off. | ||
| 736 | // 2) Games can configure these registers to provide a command list subroutine mechanism. | ||
| 737 | |||
| 738 | BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer | ||
| 739 | BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer | ||
| 740 | u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to | ||
| 741 | |||
| 742 | unsigned GetSize(unsigned index) const { | ||
| 743 | ASSERT(index < 2); | ||
| 744 | return 8 * size[index]; | ||
| 745 | } | ||
| 746 | |||
| 747 | PAddr GetPhysicalAddress(unsigned index) const { | ||
| 748 | ASSERT(index < 2); | ||
| 749 | return (PAddr)(8 * addr[index]); | ||
| 750 | } | ||
| 751 | } command_buffer; | ||
| 752 | |||
| 753 | INSERT_PADDING_WORDS(0x20); | ||
| 712 | 754 | ||
| 713 | enum class TriangleTopology : u32 { | 755 | enum class TriangleTopology : u32 { |
| 714 | List = 0, | 756 | List = 0, |
| @@ -861,6 +903,7 @@ struct Regs { | |||
| 861 | ADD_FIELD(trigger_draw); | 903 | ADD_FIELD(trigger_draw); |
| 862 | ADD_FIELD(trigger_draw_indexed); | 904 | ADD_FIELD(trigger_draw_indexed); |
| 863 | ADD_FIELD(vs_default_attributes_setup); | 905 | ADD_FIELD(vs_default_attributes_setup); |
| 906 | ADD_FIELD(command_buffer); | ||
| 864 | ADD_FIELD(triangle_topology); | 907 | ADD_FIELD(triangle_topology); |
| 865 | ADD_FIELD(vs_bool_uniforms); | 908 | ADD_FIELD(vs_bool_uniforms); |
| 866 | ADD_FIELD(vs_int_uniforms); | 909 | ADD_FIELD(vs_int_uniforms); |
| @@ -938,6 +981,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228); | |||
| 938 | ASSERT_REG_POSITION(trigger_draw, 0x22e); | 981 | ASSERT_REG_POSITION(trigger_draw, 0x22e); |
| 939 | ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | 982 | ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); |
| 940 | ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); | 983 | ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); |
| 984 | ASSERT_REG_POSITION(command_buffer, 0x238); | ||
| 941 | ASSERT_REG_POSITION(triangle_topology, 0x25e); | 985 | ASSERT_REG_POSITION(triangle_topology, 0x25e); |
| 942 | ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); | 986 | ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); |
| 943 | ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); | 987 | ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); |
| @@ -1053,21 +1097,12 @@ private: | |||
| 1053 | float value; | 1097 | float value; |
| 1054 | }; | 1098 | }; |
| 1055 | 1099 | ||
| 1056 | union CommandHeader { | ||
| 1057 | CommandHeader(u32 h) : hex(h) {} | ||
| 1058 | |||
| 1059 | u32 hex; | ||
| 1060 | |||
| 1061 | BitField< 0, 16, u32> cmd_id; | ||
| 1062 | BitField<16, 4, u32> parameter_mask; | ||
| 1063 | BitField<20, 11, u32> extra_data_length; | ||
| 1064 | BitField<31, 1, u32> group_commands; | ||
| 1065 | }; | ||
| 1066 | |||
| 1067 | /// Struct used to describe current Pica state | 1100 | /// Struct used to describe current Pica state |
| 1068 | struct State { | 1101 | struct State { |
| 1102 | /// Pica registers | ||
| 1069 | Regs regs; | 1103 | Regs regs; |
| 1070 | 1104 | ||
| 1105 | /// Vertex shader memory | ||
| 1071 | struct { | 1106 | struct { |
| 1072 | struct { | 1107 | struct { |
| 1073 | Math::Vec4<float24> f[96]; | 1108 | Math::Vec4<float24> f[96]; |
| @@ -1080,6 +1115,13 @@ struct State { | |||
| 1080 | std::array<u32, 1024> program_code; | 1115 | std::array<u32, 1024> program_code; |
| 1081 | std::array<u32, 1024> swizzle_data; | 1116 | std::array<u32, 1024> swizzle_data; |
| 1082 | } vs; | 1117 | } vs; |
| 1118 | |||
| 1119 | /// Current Pica command list | ||
| 1120 | struct { | ||
| 1121 | const u32* head_ptr; | ||
| 1122 | const u32* current_ptr; | ||
| 1123 | u32 length; | ||
| 1124 | } cmd_list; | ||
| 1083 | }; | 1125 | }; |
| 1084 | 1126 | ||
| 1085 | /// Initialize Pica state | 1127 | /// Initialize Pica state |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 6df3a74f2..113b573f8 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -402,11 +402,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 402 | 402 | ||
| 403 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { | 403 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { |
| 404 | switch (source) { | 404 | switch (source) { |
| 405 | // TODO: What's the difference between these two? | ||
| 406 | case Source::PrimaryColor: | 405 | case Source::PrimaryColor: |
| 406 | |||
| 407 | // HACK: Until we implement fragment lighting, use primary_color | ||
| 407 | case Source::PrimaryFragmentColor: | 408 | case Source::PrimaryFragmentColor: |
| 408 | return primary_color; | 409 | return primary_color; |
| 409 | 410 | ||
| 411 | // HACK: Until we implement fragment lighting, use zero | ||
| 412 | case Source::SecondaryFragmentColor: | ||
| 413 | return {0, 0, 0, 0}; | ||
| 414 | |||
| 410 | case Source::Texture0: | 415 | case Source::Texture0: |
| 411 | return texture_color[0]; | 416 | return texture_color[0]; |
| 412 | 417 | ||
| @@ -570,6 +575,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 570 | case Operation::Add: | 575 | case Operation::Add: |
| 571 | return std::min(255, input[0] + input[1]); | 576 | return std::min(255, input[0] + input[1]); |
| 572 | 577 | ||
| 578 | case Operation::AddSigned: | ||
| 579 | { | ||
| 580 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | ||
| 581 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | ||
| 582 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | ||
| 583 | } | ||
| 584 | |||
| 573 | case Operation::Lerp: | 585 | case Operation::Lerp: |
| 574 | return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | 586 | return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; |
| 575 | 587 | ||
| @@ -808,10 +820,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 808 | } | 820 | } |
| 809 | }; | 821 | }; |
| 810 | 822 | ||
| 811 | using BlendEquation = Regs::BlendEquation; | ||
| 812 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | 823 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, |
| 813 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | 824 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, |
| 814 | BlendEquation equation) { | 825 | Regs::BlendEquation equation) { |
| 815 | Math::Vec4<int> result; | 826 | Math::Vec4<int> result; |
| 816 | 827 | ||
| 817 | auto src_result = (src * srcfactor).Cast<int>(); | 828 | auto src_result = (src * srcfactor).Cast<int>(); |
| @@ -866,8 +877,63 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 866 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | 877 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); |
| 867 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | 878 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); |
| 868 | } else { | 879 | } else { |
| 869 | LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op); | 880 | static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { |
| 870 | UNIMPLEMENTED(); | 881 | switch (op) { |
| 882 | case Regs::LogicOp::Clear: | ||
| 883 | return 0; | ||
| 884 | |||
| 885 | case Regs::LogicOp::And: | ||
| 886 | return src & dest; | ||
| 887 | |||
| 888 | case Regs::LogicOp::AndReverse: | ||
| 889 | return src & ~dest; | ||
| 890 | |||
| 891 | case Regs::LogicOp::Copy: | ||
| 892 | return src; | ||
| 893 | |||
| 894 | case Regs::LogicOp::Set: | ||
| 895 | return 255; | ||
| 896 | |||
| 897 | case Regs::LogicOp::CopyInverted: | ||
| 898 | return ~src; | ||
| 899 | |||
| 900 | case Regs::LogicOp::NoOp: | ||
| 901 | return dest; | ||
| 902 | |||
| 903 | case Regs::LogicOp::Invert: | ||
| 904 | return ~dest; | ||
| 905 | |||
| 906 | case Regs::LogicOp::Nand: | ||
| 907 | return ~(src & dest); | ||
| 908 | |||
| 909 | case Regs::LogicOp::Or: | ||
| 910 | return src | dest; | ||
| 911 | |||
| 912 | case Regs::LogicOp::Nor: | ||
| 913 | return ~(src | dest); | ||
| 914 | |||
| 915 | case Regs::LogicOp::Xor: | ||
| 916 | return src ^ dest; | ||
| 917 | |||
| 918 | case Regs::LogicOp::Equiv: | ||
| 919 | return ~(src ^ dest); | ||
| 920 | |||
| 921 | case Regs::LogicOp::AndInverted: | ||
| 922 | return ~src & dest; | ||
| 923 | |||
| 924 | case Regs::LogicOp::OrReverse: | ||
| 925 | return src | ~dest; | ||
| 926 | |||
| 927 | case Regs::LogicOp::OrInverted: | ||
| 928 | return ~src | dest; | ||
| 929 | } | ||
| 930 | }; | ||
| 931 | |||
| 932 | blend_output = Math::MakeVec( | ||
| 933 | LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | ||
| 934 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | ||
| 935 | LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), | ||
| 936 | LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); | ||
| 871 | } | 937 | } |
| 872 | 938 | ||
| 873 | const Math::Vec4<u8> result = { | 939 | const Math::Vec4<u8> result = { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bacdb7172..b51f8efdf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -135,6 +135,7 @@ void RasterizerOpenGL::Reset() { | |||
| 135 | SyncBlendFuncs(); | 135 | SyncBlendFuncs(); |
| 136 | SyncBlendColor(); | 136 | SyncBlendColor(); |
| 137 | SyncAlphaTest(); | 137 | SyncAlphaTest(); |
| 138 | SyncLogicOp(); | ||
| 138 | SyncStencilTest(); | 139 | SyncStencilTest(); |
| 139 | SyncDepthTest(); | 140 | SyncDepthTest(); |
| 140 | 141 | ||
| @@ -249,6 +250,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 249 | SyncDepthTest(); | 250 | SyncDepthTest(); |
| 250 | break; | 251 | break; |
| 251 | 252 | ||
| 253 | // Logic op | ||
| 254 | case PICA_REG_INDEX(output_merger.logic_op): | ||
| 255 | SyncLogicOp(); | ||
| 256 | break; | ||
| 257 | |||
| 252 | // TEV stage 0 | 258 | // TEV stage 0 |
| 253 | case PICA_REG_INDEX(tev_stage0.color_source1): | 259 | case PICA_REG_INDEX(tev_stage0.color_source1): |
| 254 | SyncTevSources(0, regs.tev_stage0); | 260 | SyncTevSources(0, regs.tev_stage0); |
| @@ -633,6 +639,10 @@ void RasterizerOpenGL::SyncAlphaTest() { | |||
| 633 | glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); | 639 | glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); |
| 634 | } | 640 | } |
| 635 | 641 | ||
| 642 | void RasterizerOpenGL::SyncLogicOp() { | ||
| 643 | state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op); | ||
| 644 | } | ||
| 645 | |||
| 636 | void RasterizerOpenGL::SyncStencilTest() { | 646 | void RasterizerOpenGL::SyncStencilTest() { |
| 637 | // TODO: Implement stencil test, mask, and op | 647 | // TODO: Implement stencil test, mask, and op |
| 638 | } | 648 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9896f8d04..d7d422b1f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -125,6 +125,9 @@ private: | |||
| 125 | /// Syncs the alpha test states to match the PICA register | 125 | /// Syncs the alpha test states to match the PICA register |
| 126 | void SyncAlphaTest(); | 126 | void SyncAlphaTest(); |
| 127 | 127 | ||
| 128 | /// Syncs the logic op states to match the PICA register | ||
| 129 | void SyncLogicOp(); | ||
| 130 | |||
| 128 | /// Syncs the stencil test states to match the PICA register | 131 | /// Syncs the stencil test states to match the PICA register |
| 129 | void SyncStencilTest(); | 132 | void SyncStencilTest(); |
| 130 | 133 | ||
diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h index 8f0941230..a8cb2f595 100644 --- a/src/video_core/renderer_opengl/gl_shaders.h +++ b/src/video_core/renderer_opengl/gl_shaders.h | |||
| @@ -69,15 +69,16 @@ const char g_fragment_shader_hw[] = R"( | |||
| 69 | #define NUM_VTX_ATTR 7 | 69 | #define NUM_VTX_ATTR 7 |
| 70 | #define NUM_TEV_STAGES 6 | 70 | #define NUM_TEV_STAGES 6 |
| 71 | 71 | ||
| 72 | #define SOURCE_PRIMARYCOLOR 0x0 | 72 | #define SOURCE_PRIMARYCOLOR 0x0 |
| 73 | #define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 | 73 | #define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 |
| 74 | #define SOURCE_TEXTURE0 0x3 | 74 | #define SOURCE_SECONDARYFRAGMENTCOLOR 0x2 |
| 75 | #define SOURCE_TEXTURE1 0x4 | 75 | #define SOURCE_TEXTURE0 0x3 |
| 76 | #define SOURCE_TEXTURE2 0x5 | 76 | #define SOURCE_TEXTURE1 0x4 |
| 77 | #define SOURCE_TEXTURE3 0x6 | 77 | #define SOURCE_TEXTURE2 0x5 |
| 78 | #define SOURCE_PREVIOUSBUFFER 0xd | 78 | #define SOURCE_TEXTURE3 0x6 |
| 79 | #define SOURCE_CONSTANT 0xe | 79 | #define SOURCE_PREVIOUSBUFFER 0xd |
| 80 | #define SOURCE_PREVIOUS 0xf | 80 | #define SOURCE_CONSTANT 0xe |
| 81 | #define SOURCE_PREVIOUS 0xf | ||
| 81 | 82 | ||
| 82 | #define COLORMODIFIER_SOURCECOLOR 0x0 | 83 | #define COLORMODIFIER_SOURCECOLOR 0x0 |
| 83 | #define COLORMODIFIER_ONEMINUSSOURCECOLOR 0x1 | 84 | #define COLORMODIFIER_ONEMINUSSOURCECOLOR 0x1 |
| @@ -151,8 +152,11 @@ vec4 GetSource(int source) { | |||
| 151 | if (source == SOURCE_PRIMARYCOLOR) { | 152 | if (source == SOURCE_PRIMARYCOLOR) { |
| 152 | return o[2]; | 153 | return o[2]; |
| 153 | } else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) { | 154 | } else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) { |
| 154 | // HACK: Uses color value, but should really use fragment lighting output | 155 | // HACK: Until we implement fragment lighting, use primary_color |
| 155 | return o[2]; | 156 | return o[2]; |
| 157 | } else if (source == SOURCE_SECONDARYFRAGMENTCOLOR) { | ||
| 158 | // HACK: Until we implement fragment lighting, use zero | ||
| 159 | return vec4(0.0, 0.0, 0.0, 0.0); | ||
| 156 | } else if (source == SOURCE_TEXTURE0) { | 160 | } else if (source == SOURCE_TEXTURE0) { |
| 157 | return texture(tex[0], o[3].xy); | 161 | return texture(tex[0], o[3].xy); |
| 158 | } else if (source == SOURCE_TEXTURE1) { | 162 | } else if (source == SOURCE_TEXTURE1) { |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 0d7ba1983..9c5f38f94 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -32,6 +32,8 @@ OpenGLState::OpenGLState() { | |||
| 32 | blend.color.blue = 0.0f; | 32 | blend.color.blue = 0.0f; |
| 33 | blend.color.alpha = 0.0f; | 33 | blend.color.alpha = 0.0f; |
| 34 | 34 | ||
| 35 | logic_op = GL_COPY; | ||
| 36 | |||
| 35 | for (auto& texture_unit : texture_units) { | 37 | for (auto& texture_unit : texture_units) { |
| 36 | texture_unit.enabled_2d = false; | 38 | texture_unit.enabled_2d = false; |
| 37 | texture_unit.texture_2d = 0; | 39 | texture_unit.texture_2d = 0; |
| @@ -99,8 +101,13 @@ void OpenGLState::Apply() { | |||
| 99 | if (blend.enabled != cur_state.blend.enabled) { | 101 | if (blend.enabled != cur_state.blend.enabled) { |
| 100 | if (blend.enabled) { | 102 | if (blend.enabled) { |
| 101 | glEnable(GL_BLEND); | 103 | glEnable(GL_BLEND); |
| 104 | |||
| 105 | cur_state.logic_op = GL_COPY; | ||
| 106 | glLogicOp(cur_state.logic_op); | ||
| 107 | glDisable(GL_COLOR_LOGIC_OP); | ||
| 102 | } else { | 108 | } else { |
| 103 | glDisable(GL_BLEND); | 109 | glDisable(GL_BLEND); |
| 110 | glEnable(GL_COLOR_LOGIC_OP); | ||
| 104 | } | 111 | } |
| 105 | } | 112 | } |
| 106 | 113 | ||
| @@ -118,6 +125,10 @@ void OpenGLState::Apply() { | |||
| 118 | glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func); | 125 | glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func); |
| 119 | } | 126 | } |
| 120 | 127 | ||
| 128 | if (logic_op != cur_state.logic_op) { | ||
| 129 | glLogicOp(logic_op); | ||
| 130 | } | ||
| 131 | |||
| 121 | // Textures | 132 | // Textures |
| 122 | for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) { | 133 | for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) { |
| 123 | if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d) { | 134 | if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d) { |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 63dba2761..6b97721d6 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -42,6 +42,8 @@ public: | |||
| 42 | } color; // GL_BLEND_COLOR | 42 | } color; // GL_BLEND_COLOR |
| 43 | } blend; | 43 | } blend; |
| 44 | 44 | ||
| 45 | GLenum logic_op; // GL_LOGIC_OP_MODE | ||
| 46 | |||
| 45 | // 3 texture units - one for each that is used in PICA fragment shader emulation | 47 | // 3 texture units - one for each that is used in PICA fragment shader emulation |
| 46 | struct { | 48 | struct { |
| 47 | bool enabled_2d; // GL_TEXTURE_2D | 49 | bool enabled_2d; // GL_TEXTURE_2D |
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index f8763e71b..e566f9f7a 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h | |||
| @@ -71,6 +71,37 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { | |||
| 71 | return blend_func_table[(unsigned)factor]; | 71 | return blend_func_table[(unsigned)factor]; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | inline GLenum LogicOp(Pica::Regs::LogicOp op) { | ||
| 75 | static const GLenum logic_op_table[] = { | ||
| 76 | GL_CLEAR, // Clear | ||
| 77 | GL_AND, // And | ||
| 78 | GL_AND_REVERSE, // AndReverse | ||
| 79 | GL_COPY, // Copy | ||
| 80 | GL_SET, // Set | ||
| 81 | GL_COPY_INVERTED, // CopyInverted | ||
| 82 | GL_NOOP, // NoOp | ||
| 83 | GL_INVERT, // Invert | ||
| 84 | GL_NAND, // Nand | ||
| 85 | GL_OR, // Or | ||
| 86 | GL_NOR, // Nor | ||
| 87 | GL_XOR, // Xor | ||
| 88 | GL_EQUIV, // Equiv | ||
| 89 | GL_AND_INVERTED, // AndInverted | ||
| 90 | GL_OR_REVERSE, // OrReverse | ||
| 91 | GL_OR_INVERTED, // OrInverted | ||
| 92 | }; | ||
| 93 | |||
| 94 | // Range check table for input | ||
| 95 | if ((unsigned)op >= ARRAY_SIZE(logic_op_table)) { | ||
| 96 | LOG_CRITICAL(Render_OpenGL, "Unknown logic op %d", op); | ||
| 97 | UNREACHABLE(); | ||
| 98 | |||
| 99 | return GL_COPY; | ||
| 100 | } | ||
| 101 | |||
| 102 | return logic_op_table[(unsigned)op]; | ||
| 103 | } | ||
| 104 | |||
| 74 | inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { | 105 | inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { |
| 75 | static const GLenum compare_func_table[] = { | 106 | static const GLenum compare_func_table[] = { |
| 76 | GL_NEVER, // CompareFunc::Never | 107 | GL_NEVER, // CompareFunc::Never |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 4ebb42429..87006a832 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -119,17 +119,13 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 119 | switch (instr.opcode.Value().GetInfo().type) { | 119 | switch (instr.opcode.Value().GetInfo().type) { |
| 120 | case OpCode::Type::Arithmetic: | 120 | case OpCode::Type::Arithmetic: |
| 121 | { | 121 | { |
| 122 | bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed); | 122 | const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); |
| 123 | // TODO: We don't really support this properly: For instance, the address register | ||
| 124 | // offset needs to be applied to SRC2 instead, etc. | ||
| 125 | // For now, we just abort in this situation. | ||
| 126 | ASSERT_MSG(!is_inverted, "Bad condition..."); | ||
| 127 | 123 | ||
| 128 | const int address_offset = (instr.common.address_register_index == 0) | 124 | const int address_offset = (instr.common.address_register_index == 0) |
| 129 | ? 0 : state.address_registers[instr.common.address_register_index - 1]; | 125 | ? 0 : state.address_registers[instr.common.address_register_index - 1]; |
| 130 | 126 | ||
| 131 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); | 127 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); |
| 132 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); | 128 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); |
| 133 | 129 | ||
| 134 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | 130 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |
| 135 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | 131 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |
| @@ -208,6 +204,15 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 208 | } | 204 | } |
| 209 | break; | 205 | break; |
| 210 | 206 | ||
| 207 | case OpCode::Id::MIN: | ||
| 208 | for (int i = 0; i < 4; ++i) { | ||
| 209 | if (!swizzle.DestComponentEnabled(i)) | ||
| 210 | continue; | ||
| 211 | |||
| 212 | dest[i] = std::min(src1[i], src2[i]); | ||
| 213 | } | ||
| 214 | break; | ||
| 215 | |||
| 211 | case OpCode::Id::DP3: | 216 | case OpCode::Id::DP3: |
| 212 | case OpCode::Id::DP4: | 217 | case OpCode::Id::DP4: |
| 213 | { | 218 | { |
| @@ -279,6 +284,16 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 279 | break; | 284 | break; |
| 280 | } | 285 | } |
| 281 | 286 | ||
| 287 | case OpCode::Id::SLT: | ||
| 288 | case OpCode::Id::SLTI: | ||
| 289 | for (int i = 0; i < 4; ++i) { | ||
| 290 | if (!swizzle.DestComponentEnabled(i)) | ||
| 291 | continue; | ||
| 292 | |||
| 293 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||
| 294 | } | ||
| 295 | break; | ||
| 296 | |||
| 282 | case OpCode::Id::CMP: | 297 | case OpCode::Id::CMP: |
| 283 | for (int i = 0; i < 2; ++i) { | 298 | for (int i = 0; i < 2; ++i) { |
| 284 | // TODO: Can you restrict to one compare via dest masking? | 299 | // TODO: Can you restrict to one compare via dest masking? |