diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/vector_math.h | 10 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/pica_state.h | 54 | ||||
| -rw-r--r-- | src/video_core/regs.h | 7 | ||||
| -rw-r--r-- | src/video_core/regs_texturing.h | 96 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 232 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 271 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 36 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 20 | ||||
| -rw-r--r-- | src/video_core/swrasterizer/proctex.cpp | 223 | ||||
| -rw-r--r-- | src/video_core/swrasterizer/proctex.h | 16 | ||||
| -rw-r--r-- | src/video_core/swrasterizer/rasterizer.cpp | 13 |
15 files changed, 1048 insertions, 11 deletions
diff --git a/src/common/vector_math.h b/src/common/vector_math.h index 7ca8e15f5..c7a461a1e 100644 --- a/src/common/vector_math.h +++ b/src/common/vector_math.h | |||
| @@ -652,6 +652,16 @@ static inline decltype((X{} * int{} + X{} * int{}) / base) LerpInt(const X& begi | |||
| 652 | return (begin * (base - t) + end * t) / base; | 652 | return (begin * (base - t) + end * t) / base; |
| 653 | } | 653 | } |
| 654 | 654 | ||
| 655 | // bilinear interpolation. s is for interpolating x00-x01 and x10-x11, and t is for the second | ||
| 656 | // interpolation. | ||
| 657 | template <typename X> | ||
| 658 | inline auto BilinearInterp(const X& x00, const X& x01, const X& x10, const X& x11, const float s, | ||
| 659 | const float t) { | ||
| 660 | auto y0 = Lerp(x00, x01, s); | ||
| 661 | auto y1 = Lerp(x10, x11, s); | ||
| 662 | return Lerp(y0, y1, t); | ||
| 663 | } | ||
| 664 | |||
| 655 | // Utility vector factories | 665 | // Utility vector factories |
| 656 | template <typename T> | 666 | template <typename T> |
| 657 | static inline Vec2<T> MakeVec(const T& x, const T& y) { | 667 | static inline Vec2<T> MakeVec(const T& x, const T& y) { |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5317719e8..e00b88f71 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -15,6 +15,7 @@ set(SRCS | |||
| 15 | shader/shader_interpreter.cpp | 15 | shader/shader_interpreter.cpp |
| 16 | swrasterizer/clipper.cpp | 16 | swrasterizer/clipper.cpp |
| 17 | swrasterizer/framebuffer.cpp | 17 | swrasterizer/framebuffer.cpp |
| 18 | swrasterizer/proctex.cpp | ||
| 18 | swrasterizer/rasterizer.cpp | 19 | swrasterizer/rasterizer.cpp |
| 19 | swrasterizer/swrasterizer.cpp | 20 | swrasterizer/swrasterizer.cpp |
| 20 | swrasterizer/texturing.cpp | 21 | swrasterizer/texturing.cpp |
| @@ -54,6 +55,7 @@ set(HEADERS | |||
| 54 | shader/shader_interpreter.h | 55 | shader/shader_interpreter.h |
| 55 | swrasterizer/clipper.h | 56 | swrasterizer/clipper.h |
| 56 | swrasterizer/framebuffer.h | 57 | swrasterizer/framebuffer.h |
| 58 | swrasterizer/proctex.h | ||
| 57 | swrasterizer/rasterizer.h | 59 | swrasterizer/rasterizer.h |
| 58 | swrasterizer/swrasterizer.h | 60 | swrasterizer/swrasterizer.h |
| 59 | swrasterizer/texturing.h | 61 | swrasterizer/texturing.h |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 8d3f76bde..4633a1df1 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -556,6 +556,37 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 556 | break; | 556 | break; |
| 557 | } | 557 | } |
| 558 | 558 | ||
| 559 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[0], 0xb0): | ||
| 560 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[1], 0xb1): | ||
| 561 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[2], 0xb2): | ||
| 562 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[3], 0xb3): | ||
| 563 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[4], 0xb4): | ||
| 564 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[5], 0xb5): | ||
| 565 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[6], 0xb6): | ||
| 566 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[7], 0xb7): { | ||
| 567 | auto& index = regs.texturing.proctex_lut_config.index; | ||
| 568 | auto& pt = g_state.proctex; | ||
| 569 | |||
| 570 | switch (regs.texturing.proctex_lut_config.ref_table.Value()) { | ||
| 571 | case TexturingRegs::ProcTexLutTable::Noise: | ||
| 572 | pt.noise_table[index % pt.noise_table.size()].raw = value; | ||
| 573 | break; | ||
| 574 | case TexturingRegs::ProcTexLutTable::ColorMap: | ||
| 575 | pt.color_map_table[index % pt.color_map_table.size()].raw = value; | ||
| 576 | break; | ||
| 577 | case TexturingRegs::ProcTexLutTable::AlphaMap: | ||
| 578 | pt.alpha_map_table[index % pt.alpha_map_table.size()].raw = value; | ||
| 579 | break; | ||
| 580 | case TexturingRegs::ProcTexLutTable::Color: | ||
| 581 | pt.color_table[index % pt.color_table.size()].raw = value; | ||
| 582 | break; | ||
| 583 | case TexturingRegs::ProcTexLutTable::ColorDiff: | ||
| 584 | pt.color_diff_table[index % pt.color_diff_table.size()].raw = value; | ||
| 585 | break; | ||
| 586 | } | ||
| 587 | index.Assign(index + 1); | ||
| 588 | break; | ||
| 589 | } | ||
| 559 | default: | 590 | default: |
| 560 | break; | 591 | break; |
| 561 | } | 592 | } |
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index af7536d11..f46db09fb 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/vector_math.h" | ||
| 10 | #include "video_core/primitive_assembly.h" | 11 | #include "video_core/primitive_assembly.h" |
| 11 | #include "video_core/regs.h" | 12 | #include "video_core/regs.h" |
| 12 | #include "video_core/shader/shader.h" | 13 | #include "video_core/shader/shader.h" |
| @@ -25,6 +26,59 @@ struct State { | |||
| 25 | 26 | ||
| 26 | Shader::AttributeBuffer input_default_attributes; | 27 | Shader::AttributeBuffer input_default_attributes; |
| 27 | 28 | ||
| 29 | struct ProcTex { | ||
| 30 | union ValueEntry { | ||
| 31 | u32 raw; | ||
| 32 | |||
| 33 | // LUT value, encoded as 12-bit fixed point, with 12 fraction bits | ||
| 34 | BitField<0, 12, u32> value; // 0.0.12 fixed point | ||
| 35 | |||
| 36 | // Difference between two entry values. Used for efficient interpolation. | ||
| 37 | // 0.0.12 fixed point with two's complement. The range is [-0.5, 0.5). | ||
| 38 | // Note: the type of this is different from the one of lighting LUT | ||
| 39 | BitField<12, 12, s32> difference; | ||
| 40 | |||
| 41 | float ToFloat() const { | ||
| 42 | return static_cast<float>(value) / 4095.f; | ||
| 43 | } | ||
| 44 | |||
| 45 | float DiffToFloat() const { | ||
| 46 | return static_cast<float>(difference) / 4095.f; | ||
| 47 | } | ||
| 48 | }; | ||
| 49 | |||
| 50 | union ColorEntry { | ||
| 51 | u32 raw; | ||
| 52 | BitField<0, 8, u32> r; | ||
| 53 | BitField<8, 8, u32> g; | ||
| 54 | BitField<16, 8, u32> b; | ||
| 55 | BitField<24, 8, u32> a; | ||
| 56 | |||
| 57 | Math::Vec4<u8> ToVector() const { | ||
| 58 | return {static_cast<u8>(r), static_cast<u8>(g), static_cast<u8>(b), | ||
| 59 | static_cast<u8>(a)}; | ||
| 60 | } | ||
| 61 | }; | ||
| 62 | |||
| 63 | union ColorDifferenceEntry { | ||
| 64 | u32 raw; | ||
| 65 | BitField<0, 8, s32> r; // half of the difference between two ColorEntry | ||
| 66 | BitField<8, 8, s32> g; | ||
| 67 | BitField<16, 8, s32> b; | ||
| 68 | BitField<24, 8, s32> a; | ||
| 69 | |||
| 70 | Math::Vec4<s32> ToVector() const { | ||
| 71 | return Math::Vec4<s32>{r, g, b, a} * 2; | ||
| 72 | } | ||
| 73 | }; | ||
| 74 | |||
| 75 | std::array<ValueEntry, 128> noise_table; | ||
| 76 | std::array<ValueEntry, 128> color_map_table; | ||
| 77 | std::array<ValueEntry, 128> alpha_map_table; | ||
| 78 | std::array<ColorEntry, 256> color_table; | ||
| 79 | std::array<ColorDifferenceEntry, 256> color_diff_table; | ||
| 80 | } proctex; | ||
| 81 | |||
| 28 | struct { | 82 | struct { |
| 29 | union LutEntry { | 83 | union LutEntry { |
| 30 | // Used for raw access | 84 | // Used for raw access |
diff --git a/src/video_core/regs.h b/src/video_core/regs.h index 1776dad89..6d5f98cac 100644 --- a/src/video_core/regs.h +++ b/src/video_core/regs.h | |||
| @@ -101,6 +101,13 @@ ASSERT_REG_POSITION(texturing.texture1, 0x91); | |||
| 101 | ASSERT_REG_POSITION(texturing.texture1_format, 0x96); | 101 | ASSERT_REG_POSITION(texturing.texture1_format, 0x96); |
| 102 | ASSERT_REG_POSITION(texturing.texture2, 0x99); | 102 | ASSERT_REG_POSITION(texturing.texture2, 0x99); |
| 103 | ASSERT_REG_POSITION(texturing.texture2_format, 0x9e); | 103 | ASSERT_REG_POSITION(texturing.texture2_format, 0x9e); |
| 104 | ASSERT_REG_POSITION(texturing.proctex, 0xa8); | ||
| 105 | ASSERT_REG_POSITION(texturing.proctex_noise_u, 0xa9); | ||
| 106 | ASSERT_REG_POSITION(texturing.proctex_noise_v, 0xaa); | ||
| 107 | ASSERT_REG_POSITION(texturing.proctex_noise_frequency, 0xab); | ||
| 108 | ASSERT_REG_POSITION(texturing.proctex_lut, 0xac); | ||
| 109 | ASSERT_REG_POSITION(texturing.proctex_lut_offset, 0xad); | ||
| 110 | ASSERT_REG_POSITION(texturing.proctex_lut_config, 0xaf); | ||
| 104 | ASSERT_REG_POSITION(texturing.tev_stage0, 0xc0); | 111 | ASSERT_REG_POSITION(texturing.tev_stage0, 0xc0); |
| 105 | ASSERT_REG_POSITION(texturing.tev_stage1, 0xc8); | 112 | ASSERT_REG_POSITION(texturing.tev_stage1, 0xc8); |
| 106 | ASSERT_REG_POSITION(texturing.tev_stage2, 0xd0); | 113 | ASSERT_REG_POSITION(texturing.tev_stage2, 0xd0); |
diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index 3318812da..e4038b41b 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h | |||
| @@ -127,8 +127,8 @@ struct TexturingRegs { | |||
| 127 | BitField<0, 1, u32> texture0_enable; | 127 | BitField<0, 1, u32> texture0_enable; |
| 128 | BitField<1, 1, u32> texture1_enable; | 128 | BitField<1, 1, u32> texture1_enable; |
| 129 | BitField<2, 1, u32> texture2_enable; | 129 | BitField<2, 1, u32> texture2_enable; |
| 130 | BitField<8, 2, u32> texture3_coordinates; // TODO: unimplemented | 130 | BitField<8, 2, u32> texture3_coordinates; |
| 131 | BitField<10, 1, u32> texture3_enable; // TODO: unimplemented | 131 | BitField<10, 1, u32> texture3_enable; |
| 132 | BitField<13, 1, u32> texture2_use_coord1; | 132 | BitField<13, 1, u32> texture2_use_coord1; |
| 133 | BitField<16, 1, u32> clear_texture_cache; // TODO: unimplemented | 133 | BitField<16, 1, u32> clear_texture_cache; // TODO: unimplemented |
| 134 | } main_config; | 134 | } main_config; |
| @@ -142,7 +142,7 @@ struct TexturingRegs { | |||
| 142 | INSERT_PADDING_WORDS(0x2); | 142 | INSERT_PADDING_WORDS(0x2); |
| 143 | TextureConfig texture2; | 143 | TextureConfig texture2; |
| 144 | BitField<0, 4, TextureFormat> texture2_format; | 144 | BitField<0, 4, TextureFormat> texture2_format; |
| 145 | INSERT_PADDING_WORDS(0x21); | 145 | INSERT_PADDING_WORDS(0x9); |
| 146 | 146 | ||
| 147 | struct FullTextureConfig { | 147 | struct FullTextureConfig { |
| 148 | const bool enabled; | 148 | const bool enabled; |
| @@ -157,6 +157,96 @@ struct TexturingRegs { | |||
| 157 | }}; | 157 | }}; |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | // 0xa8-0xad: ProcTex Config | ||
| 161 | enum class ProcTexClamp : u32 { | ||
| 162 | ToZero = 0, | ||
| 163 | ToEdge = 1, | ||
| 164 | SymmetricalRepeat = 2, | ||
| 165 | MirroredRepeat = 3, | ||
| 166 | Pulse = 4, | ||
| 167 | }; | ||
| 168 | |||
| 169 | enum class ProcTexCombiner : u32 { | ||
| 170 | U = 0, // u | ||
| 171 | U2 = 1, // u * u | ||
| 172 | V = 2, // v | ||
| 173 | V2 = 3, // v * v | ||
| 174 | Add = 4, // (u + v) / 2 | ||
| 175 | Add2 = 5, // (u * u + v * v) / 2 | ||
| 176 | SqrtAdd2 = 6, // sqrt(u * u + v * v) | ||
| 177 | Min = 7, // min(u, v) | ||
| 178 | Max = 8, // max(u, v) | ||
| 179 | RMax = 9, // Average of Max and SqrtAdd2 | ||
| 180 | }; | ||
| 181 | |||
| 182 | enum class ProcTexShift : u32 { | ||
| 183 | None = 0, | ||
| 184 | Odd = 1, | ||
| 185 | Even = 2, | ||
| 186 | }; | ||
| 187 | |||
| 188 | union { | ||
| 189 | BitField<0, 3, ProcTexClamp> u_clamp; | ||
| 190 | BitField<3, 3, ProcTexClamp> v_clamp; | ||
| 191 | BitField<6, 4, ProcTexCombiner> color_combiner; | ||
| 192 | BitField<10, 4, ProcTexCombiner> alpha_combiner; | ||
| 193 | BitField<14, 1, u32> separate_alpha; | ||
| 194 | BitField<15, 1, u32> noise_enable; | ||
| 195 | BitField<16, 2, ProcTexShift> u_shift; | ||
| 196 | BitField<18, 2, ProcTexShift> v_shift; | ||
| 197 | BitField<20, 8, u32> bias_low; // float16 TODO: unimplemented | ||
| 198 | } proctex; | ||
| 199 | |||
| 200 | union ProcTexNoiseConfig { | ||
| 201 | BitField<0, 16, s32> amplitude; // fixed1.3.12 | ||
| 202 | BitField<16, 16, u32> phase; // float16 | ||
| 203 | }; | ||
| 204 | |||
| 205 | ProcTexNoiseConfig proctex_noise_u; | ||
| 206 | ProcTexNoiseConfig proctex_noise_v; | ||
| 207 | |||
| 208 | union { | ||
| 209 | BitField<0, 16, u32> u; // float16 | ||
| 210 | BitField<16, 16, u32> v; // float16 | ||
| 211 | } proctex_noise_frequency; | ||
| 212 | |||
| 213 | enum class ProcTexFilter : u32 { | ||
| 214 | Nearest = 0, | ||
| 215 | Linear = 1, | ||
| 216 | NearestMipmapNearest = 2, | ||
| 217 | LinearMipmapNearest = 3, | ||
| 218 | NearestMipmapLinear = 4, | ||
| 219 | LinearMipmapLinear = 5, | ||
| 220 | }; | ||
| 221 | |||
| 222 | union { | ||
| 223 | BitField<0, 3, ProcTexFilter> filter; | ||
| 224 | BitField<11, 8, u32> width; | ||
| 225 | BitField<19, 8, u32> bias_high; // TODO: unimplemented | ||
| 226 | } proctex_lut; | ||
| 227 | |||
| 228 | BitField<0, 8, u32> proctex_lut_offset; | ||
| 229 | |||
| 230 | INSERT_PADDING_WORDS(0x1); | ||
| 231 | |||
| 232 | // 0xaf-0xb7: ProcTex LUT | ||
| 233 | enum class ProcTexLutTable : u32 { | ||
| 234 | Noise = 0, | ||
| 235 | ColorMap = 2, | ||
| 236 | AlphaMap = 3, | ||
| 237 | Color = 4, | ||
| 238 | ColorDiff = 5, | ||
| 239 | }; | ||
| 240 | |||
| 241 | union { | ||
| 242 | BitField<0, 8, u32> index; | ||
| 243 | BitField<8, 4, ProcTexLutTable> ref_table; | ||
| 244 | } proctex_lut_config; | ||
| 245 | |||
| 246 | u32 proctex_lut_data[8]; | ||
| 247 | |||
| 248 | INSERT_PADDING_WORDS(0x8); | ||
| 249 | |||
| 160 | // 0xc0-0xff: Texture Combiner (akin to glTexEnv) | 250 | // 0xc0-0xff: Texture Combiner (akin to glTexEnv) |
| 161 | struct TevStageConfig { | 251 | struct TevStageConfig { |
| 162 | enum class Source : u32 { | 252 | enum class Source : u32 { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 12ac9bbd9..aa9b831dd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -55,6 +55,12 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | |||
| 55 | 55 | ||
| 56 | uniform_block_data.fog_lut_dirty = true; | 56 | uniform_block_data.fog_lut_dirty = true; |
| 57 | 57 | ||
| 58 | uniform_block_data.proctex_noise_lut_dirty = true; | ||
| 59 | uniform_block_data.proctex_color_map_dirty = true; | ||
| 60 | uniform_block_data.proctex_alpha_map_dirty = true; | ||
| 61 | uniform_block_data.proctex_lut_dirty = true; | ||
| 62 | uniform_block_data.proctex_diff_lut_dirty = true; | ||
| 63 | |||
| 58 | // Set vertex attributes | 64 | // Set vertex attributes |
| 59 | glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, | 65 | glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, |
| 60 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); | 66 | sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); |
| @@ -115,6 +121,51 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | |||
| 115 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | 121 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); |
| 116 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | 122 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); |
| 117 | 123 | ||
| 124 | // Setup the noise LUT for proctex | ||
| 125 | proctex_noise_lut.Create(); | ||
| 126 | state.proctex_noise_lut.texture_1d = proctex_noise_lut.handle; | ||
| 127 | state.Apply(); | ||
| 128 | glActiveTexture(GL_TEXTURE10); | ||
| 129 | glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); | ||
| 130 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | ||
| 131 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | ||
| 132 | |||
| 133 | // Setup the color map for proctex | ||
| 134 | proctex_color_map.Create(); | ||
| 135 | state.proctex_color_map.texture_1d = proctex_color_map.handle; | ||
| 136 | state.Apply(); | ||
| 137 | glActiveTexture(GL_TEXTURE11); | ||
| 138 | glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); | ||
| 139 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | ||
| 140 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | ||
| 141 | |||
| 142 | // Setup the alpha map for proctex | ||
| 143 | proctex_alpha_map.Create(); | ||
| 144 | state.proctex_alpha_map.texture_1d = proctex_alpha_map.handle; | ||
| 145 | state.Apply(); | ||
| 146 | glActiveTexture(GL_TEXTURE12); | ||
| 147 | glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); | ||
| 148 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | ||
| 149 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | ||
| 150 | |||
| 151 | // Setup the LUT for proctex | ||
| 152 | proctex_lut.Create(); | ||
| 153 | state.proctex_lut.texture_1d = proctex_lut.handle; | ||
| 154 | state.Apply(); | ||
| 155 | glActiveTexture(GL_TEXTURE13); | ||
| 156 | glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); | ||
| 157 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | ||
| 158 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | ||
| 159 | |||
| 160 | // Setup the difference LUT for proctex | ||
| 161 | proctex_diff_lut.Create(); | ||
| 162 | state.proctex_diff_lut.texture_1d = proctex_diff_lut.handle; | ||
| 163 | state.Apply(); | ||
| 164 | glActiveTexture(GL_TEXTURE14); | ||
| 165 | glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); | ||
| 166 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | ||
| 167 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | ||
| 168 | |||
| 118 | // Sync fixed function OpenGL state | 169 | // Sync fixed function OpenGL state |
| 119 | SyncCullMode(); | 170 | SyncCullMode(); |
| 120 | SyncBlendEnabled(); | 171 | SyncBlendEnabled(); |
| @@ -272,6 +323,36 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 272 | uniform_block_data.fog_lut_dirty = false; | 323 | uniform_block_data.fog_lut_dirty = false; |
| 273 | } | 324 | } |
| 274 | 325 | ||
| 326 | // Sync the proctex noise lut | ||
| 327 | if (uniform_block_data.proctex_noise_lut_dirty) { | ||
| 328 | SyncProcTexNoiseLUT(); | ||
| 329 | uniform_block_data.proctex_noise_lut_dirty = false; | ||
| 330 | } | ||
| 331 | |||
| 332 | // Sync the proctex color map | ||
| 333 | if (uniform_block_data.proctex_color_map_dirty) { | ||
| 334 | SyncProcTexColorMap(); | ||
| 335 | uniform_block_data.proctex_color_map_dirty = false; | ||
| 336 | } | ||
| 337 | |||
| 338 | // Sync the proctex alpha map | ||
| 339 | if (uniform_block_data.proctex_alpha_map_dirty) { | ||
| 340 | SyncProcTexAlphaMap(); | ||
| 341 | uniform_block_data.proctex_alpha_map_dirty = false; | ||
| 342 | } | ||
| 343 | |||
| 344 | // Sync the proctex lut | ||
| 345 | if (uniform_block_data.proctex_lut_dirty) { | ||
| 346 | SyncProcTexLUT(); | ||
| 347 | uniform_block_data.proctex_lut_dirty = false; | ||
| 348 | } | ||
| 349 | |||
| 350 | // Sync the proctex difference lut | ||
| 351 | if (uniform_block_data.proctex_diff_lut_dirty) { | ||
| 352 | SyncProcTexDiffLUT(); | ||
| 353 | uniform_block_data.proctex_diff_lut_dirty = false; | ||
| 354 | } | ||
| 355 | |||
| 275 | // Sync the uniform data | 356 | // Sync the uniform data |
| 276 | if (uniform_block_data.dirty) { | 357 | if (uniform_block_data.dirty) { |
| 277 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, | 358 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, |
| @@ -354,6 +435,47 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 354 | uniform_block_data.fog_lut_dirty = true; | 435 | uniform_block_data.fog_lut_dirty = true; |
| 355 | break; | 436 | break; |
| 356 | 437 | ||
| 438 | // ProcTex state | ||
| 439 | case PICA_REG_INDEX(texturing.proctex): | ||
| 440 | case PICA_REG_INDEX(texturing.proctex_lut): | ||
| 441 | case PICA_REG_INDEX(texturing.proctex_lut_offset): | ||
| 442 | shader_dirty = true; | ||
| 443 | break; | ||
| 444 | |||
| 445 | case PICA_REG_INDEX(texturing.proctex_noise_u): | ||
| 446 | case PICA_REG_INDEX(texturing.proctex_noise_v): | ||
| 447 | case PICA_REG_INDEX(texturing.proctex_noise_frequency): | ||
| 448 | SyncProcTexNoise(); | ||
| 449 | break; | ||
| 450 | |||
| 451 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[0], 0xb0): | ||
| 452 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[1], 0xb1): | ||
| 453 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[2], 0xb2): | ||
| 454 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[3], 0xb3): | ||
| 455 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[4], 0xb4): | ||
| 456 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[5], 0xb5): | ||
| 457 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[6], 0xb6): | ||
| 458 | case PICA_REG_INDEX_WORKAROUND(texturing.proctex_lut_data[7], 0xb7): | ||
| 459 | using Pica::TexturingRegs; | ||
| 460 | switch (regs.texturing.proctex_lut_config.ref_table.Value()) { | ||
| 461 | case TexturingRegs::ProcTexLutTable::Noise: | ||
| 462 | uniform_block_data.proctex_noise_lut_dirty = true; | ||
| 463 | break; | ||
| 464 | case TexturingRegs::ProcTexLutTable::ColorMap: | ||
| 465 | uniform_block_data.proctex_color_map_dirty = true; | ||
| 466 | break; | ||
| 467 | case TexturingRegs::ProcTexLutTable::AlphaMap: | ||
| 468 | uniform_block_data.proctex_alpha_map_dirty = true; | ||
| 469 | break; | ||
| 470 | case TexturingRegs::ProcTexLutTable::Color: | ||
| 471 | uniform_block_data.proctex_lut_dirty = true; | ||
| 472 | break; | ||
| 473 | case TexturingRegs::ProcTexLutTable::ColorDiff: | ||
| 474 | uniform_block_data.proctex_diff_lut_dirty = true; | ||
| 475 | break; | ||
| 476 | } | ||
| 477 | break; | ||
| 478 | |||
| 357 | // Alpha test | 479 | // Alpha test |
| 358 | case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): | 480 | case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): |
| 359 | SyncAlphaTest(); | 481 | SyncAlphaTest(); |
| @@ -1072,6 +1194,35 @@ void RasterizerOpenGL::SetShader() { | |||
| 1072 | glUniform1i(uniform_fog_lut, 9); | 1194 | glUniform1i(uniform_fog_lut, 9); |
| 1073 | } | 1195 | } |
| 1074 | 1196 | ||
| 1197 | GLuint uniform_proctex_noise_lut = | ||
| 1198 | glGetUniformLocation(shader->shader.handle, "proctex_noise_lut"); | ||
| 1199 | if (uniform_proctex_noise_lut != -1) { | ||
| 1200 | glUniform1i(uniform_proctex_noise_lut, 10); | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | GLuint uniform_proctex_color_map = | ||
| 1204 | glGetUniformLocation(shader->shader.handle, "proctex_color_map"); | ||
| 1205 | if (uniform_proctex_color_map != -1) { | ||
| 1206 | glUniform1i(uniform_proctex_color_map, 11); | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | GLuint uniform_proctex_alpha_map = | ||
| 1210 | glGetUniformLocation(shader->shader.handle, "proctex_alpha_map"); | ||
| 1211 | if (uniform_proctex_alpha_map != -1) { | ||
| 1212 | glUniform1i(uniform_proctex_alpha_map, 12); | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | GLuint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut"); | ||
| 1216 | if (uniform_proctex_lut != -1) { | ||
| 1217 | glUniform1i(uniform_proctex_lut, 13); | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | GLuint uniform_proctex_diff_lut = | ||
| 1221 | glGetUniformLocation(shader->shader.handle, "proctex_diff_lut"); | ||
| 1222 | if (uniform_proctex_diff_lut != -1) { | ||
| 1223 | glUniform1i(uniform_proctex_diff_lut, 14); | ||
| 1224 | } | ||
| 1225 | |||
| 1075 | current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); | 1226 | current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); |
| 1076 | 1227 | ||
| 1077 | GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); | 1228 | GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); |
| @@ -1105,6 +1256,7 @@ void RasterizerOpenGL::SetShader() { | |||
| 1105 | } | 1256 | } |
| 1106 | 1257 | ||
| 1107 | SyncFogColor(); | 1258 | SyncFogColor(); |
| 1259 | SyncProcTexNoise(); | ||
| 1108 | } | 1260 | } |
| 1109 | } | 1261 | } |
| 1110 | } | 1262 | } |
| @@ -1204,6 +1356,86 @@ void RasterizerOpenGL::SyncFogLUT() { | |||
| 1204 | } | 1356 | } |
| 1205 | } | 1357 | } |
| 1206 | 1358 | ||
| 1359 | void RasterizerOpenGL::SyncProcTexNoise() { | ||
| 1360 | const auto& regs = Pica::g_state.regs.texturing; | ||
| 1361 | uniform_block_data.data.proctex_noise_f = { | ||
| 1362 | Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), | ||
| 1363 | Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), | ||
| 1364 | }; | ||
| 1365 | uniform_block_data.data.proctex_noise_a = { | ||
| 1366 | regs.proctex_noise_u.amplitude / 4095.0f, regs.proctex_noise_v.amplitude / 4095.0f, | ||
| 1367 | }; | ||
| 1368 | uniform_block_data.data.proctex_noise_p = { | ||
| 1369 | Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), | ||
| 1370 | Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(), | ||
| 1371 | }; | ||
| 1372 | |||
| 1373 | uniform_block_data.dirty = true; | ||
| 1374 | } | ||
| 1375 | |||
| 1376 | // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap | ||
| 1377 | static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut, | ||
| 1378 | std::array<GLvec2, 128>& lut_data, GLenum texture) { | ||
| 1379 | std::array<GLvec2, 128> new_data; | ||
| 1380 | std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { | ||
| 1381 | return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; | ||
| 1382 | }); | ||
| 1383 | |||
| 1384 | if (new_data != lut_data) { | ||
| 1385 | lut_data = new_data; | ||
| 1386 | glActiveTexture(texture); | ||
| 1387 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RG, GL_FLOAT, lut_data.data()); | ||
| 1388 | } | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | void RasterizerOpenGL::SyncProcTexNoiseLUT() { | ||
| 1392 | SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, GL_TEXTURE10); | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | void RasterizerOpenGL::SyncProcTexColorMap() { | ||
| 1396 | SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, | ||
| 1397 | GL_TEXTURE11); | ||
| 1398 | } | ||
| 1399 | |||
| 1400 | void RasterizerOpenGL::SyncProcTexAlphaMap() { | ||
| 1401 | SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, | ||
| 1402 | GL_TEXTURE12); | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | void RasterizerOpenGL::SyncProcTexLUT() { | ||
| 1406 | std::array<GLvec4, 256> new_data; | ||
| 1407 | |||
| 1408 | std::transform(Pica::g_state.proctex.color_table.begin(), | ||
| 1409 | Pica::g_state.proctex.color_table.end(), new_data.begin(), | ||
| 1410 | [](const auto& entry) { | ||
| 1411 | auto rgba = entry.ToVector() / 255.0f; | ||
| 1412 | return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; | ||
| 1413 | }); | ||
| 1414 | |||
| 1415 | if (new_data != proctex_lut_data) { | ||
| 1416 | proctex_lut_data = new_data; | ||
| 1417 | glActiveTexture(GL_TEXTURE13); | ||
| 1418 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_lut_data.data()); | ||
| 1419 | } | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | void RasterizerOpenGL::SyncProcTexDiffLUT() { | ||
| 1423 | std::array<GLvec4, 256> new_data; | ||
| 1424 | |||
| 1425 | std::transform(Pica::g_state.proctex.color_diff_table.begin(), | ||
| 1426 | Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), | ||
| 1427 | [](const auto& entry) { | ||
| 1428 | auto rgba = entry.ToVector() / 255.0f; | ||
| 1429 | return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; | ||
| 1430 | }); | ||
| 1431 | |||
| 1432 | if (new_data != proctex_diff_lut_data) { | ||
| 1433 | proctex_diff_lut_data = new_data; | ||
| 1434 | glActiveTexture(GL_TEXTURE14); | ||
| 1435 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_diff_lut_data.data()); | ||
| 1436 | } | ||
| 1437 | } | ||
| 1438 | |||
| 1207 | void RasterizerOpenGL::SyncAlphaTest() { | 1439 | void RasterizerOpenGL::SyncAlphaTest() { |
| 1208 | const auto& regs = Pica::g_state.regs; | 1440 | const auto& regs = Pica::g_state.regs; |
| 1209 | if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { | 1441 | if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 3e1770d77..a9ad7d660 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -143,6 +143,9 @@ private: | |||
| 143 | GLint scissor_x2; | 143 | GLint scissor_x2; |
| 144 | GLint scissor_y2; | 144 | GLint scissor_y2; |
| 145 | alignas(16) GLvec3 fog_color; | 145 | alignas(16) GLvec3 fog_color; |
| 146 | alignas(8) GLvec2 proctex_noise_f; | ||
| 147 | alignas(8) GLvec2 proctex_noise_a; | ||
| 148 | alignas(8) GLvec2 proctex_noise_p; | ||
| 146 | alignas(16) GLvec3 lighting_global_ambient; | 149 | alignas(16) GLvec3 lighting_global_ambient; |
| 147 | LightSrc light_src[8]; | 150 | LightSrc light_src[8]; |
| 148 | alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages | 151 | alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages |
| @@ -150,7 +153,7 @@ private: | |||
| 150 | }; | 153 | }; |
| 151 | 154 | ||
| 152 | static_assert( | 155 | static_assert( |
| 153 | sizeof(UniformData) == 0x3C0, | 156 | sizeof(UniformData) == 0x3E0, |
| 154 | "The size of the UniformData structure has changed, update the structure in the shader"); | 157 | "The size of the UniformData structure has changed, update the structure in the shader"); |
| 155 | static_assert(sizeof(UniformData) < 16384, | 158 | static_assert(sizeof(UniformData) < 16384, |
| 156 | "UniformData structure must be less than 16kb as per the OpenGL spec"); | 159 | "UniformData structure must be less than 16kb as per the OpenGL spec"); |
| @@ -180,6 +183,16 @@ private: | |||
| 180 | void SyncFogColor(); | 183 | void SyncFogColor(); |
| 181 | void SyncFogLUT(); | 184 | void SyncFogLUT(); |
| 182 | 185 | ||
| 186 | /// Sync the procedural texture noise configuration to match the PICA register | ||
| 187 | void SyncProcTexNoise(); | ||
| 188 | |||
| 189 | /// Sync the procedural texture lookup tables | ||
| 190 | void SyncProcTexNoiseLUT(); | ||
| 191 | void SyncProcTexColorMap(); | ||
| 192 | void SyncProcTexAlphaMap(); | ||
| 193 | void SyncProcTexLUT(); | ||
| 194 | void SyncProcTexDiffLUT(); | ||
| 195 | |||
| 183 | /// Syncs the alpha test states to match the PICA register | 196 | /// Syncs the alpha test states to match the PICA register |
| 184 | void SyncAlphaTest(); | 197 | void SyncAlphaTest(); |
| 185 | 198 | ||
| @@ -248,6 +261,11 @@ private: | |||
| 248 | UniformData data; | 261 | UniformData data; |
| 249 | bool lut_dirty[6]; | 262 | bool lut_dirty[6]; |
| 250 | bool fog_lut_dirty; | 263 | bool fog_lut_dirty; |
| 264 | bool proctex_noise_lut_dirty; | ||
| 265 | bool proctex_color_map_dirty; | ||
| 266 | bool proctex_alpha_map_dirty; | ||
| 267 | bool proctex_lut_dirty; | ||
| 268 | bool proctex_diff_lut_dirty; | ||
| 251 | bool dirty; | 269 | bool dirty; |
| 252 | } uniform_block_data = {}; | 270 | } uniform_block_data = {}; |
| 253 | 271 | ||
| @@ -262,4 +280,19 @@ private: | |||
| 262 | 280 | ||
| 263 | OGLTexture fog_lut; | 281 | OGLTexture fog_lut; |
| 264 | std::array<GLuint, 128> fog_lut_data{}; | 282 | std::array<GLuint, 128> fog_lut_data{}; |
| 283 | |||
| 284 | OGLTexture proctex_noise_lut; | ||
| 285 | std::array<GLvec2, 128> proctex_noise_lut_data{}; | ||
| 286 | |||
| 287 | OGLTexture proctex_color_map; | ||
| 288 | std::array<GLvec2, 128> proctex_color_map_data{}; | ||
| 289 | |||
| 290 | OGLTexture proctex_alpha_map; | ||
| 291 | std::array<GLvec2, 128> proctex_alpha_map_data{}; | ||
| 292 | |||
| 293 | OGLTexture proctex_lut; | ||
| 294 | std::array<GLvec4, 256> proctex_lut_data{}; | ||
| 295 | |||
| 296 | OGLTexture proctex_diff_lut; | ||
| 297 | std::array<GLvec4, 256> proctex_diff_lut_data{}; | ||
| 265 | }; | 298 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7b44dade8..600119321 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -114,6 +114,22 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { | |||
| 114 | state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; | 114 | state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; |
| 115 | state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; | 115 | state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; |
| 116 | 116 | ||
| 117 | state.proctex.enable = regs.texturing.main_config.texture3_enable; | ||
| 118 | if (state.proctex.enable) { | ||
| 119 | state.proctex.coord = regs.texturing.main_config.texture3_coordinates; | ||
| 120 | state.proctex.u_clamp = regs.texturing.proctex.u_clamp; | ||
| 121 | state.proctex.v_clamp = regs.texturing.proctex.v_clamp; | ||
| 122 | state.proctex.color_combiner = regs.texturing.proctex.color_combiner; | ||
| 123 | state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; | ||
| 124 | state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha; | ||
| 125 | state.proctex.noise_enable = regs.texturing.proctex.noise_enable; | ||
| 126 | state.proctex.u_shift = regs.texturing.proctex.u_shift; | ||
| 127 | state.proctex.v_shift = regs.texturing.proctex.v_shift; | ||
| 128 | state.proctex.lut_width = regs.texturing.proctex_lut.width; | ||
| 129 | state.proctex.lut_offset = regs.texturing.proctex_lut_offset; | ||
| 130 | state.proctex.lut_filter = regs.texturing.proctex_lut.filter; | ||
| 131 | } | ||
| 132 | |||
| 117 | return res; | 133 | return res; |
| 118 | } | 134 | } |
| 119 | 135 | ||
| @@ -132,8 +148,7 @@ static std::string TexCoord(const PicaShaderConfig& config, int texture_unit) { | |||
| 132 | if (texture_unit == 2 && config.state.texture2_use_coord1) { | 148 | if (texture_unit == 2 && config.state.texture2_use_coord1) { |
| 133 | return "texcoord[1]"; | 149 | return "texcoord[1]"; |
| 134 | } | 150 | } |
| 135 | // TODO: if texture unit 3 (procedural texture) implementation also uses this function, | 151 | |
| 136 | // config.state.texture3_coordinates should be repected here. | ||
| 137 | return "texcoord[" + std::to_string(texture_unit) + "]"; | 152 | return "texcoord[" + std::to_string(texture_unit) + "]"; |
| 138 | } | 153 | } |
| 139 | 154 | ||
| @@ -175,6 +190,14 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config, | |||
| 175 | case Source::Texture2: | 190 | case Source::Texture2: |
| 176 | out += "texture(tex[2], " + TexCoord(config, 2) + ")"; | 191 | out += "texture(tex[2], " + TexCoord(config, 2) + ")"; |
| 177 | break; | 192 | break; |
| 193 | case Source::Texture3: | ||
| 194 | if (config.state.proctex.enable) { | ||
| 195 | out += "ProcTex()"; | ||
| 196 | } else { | ||
| 197 | LOG_ERROR(Render_OpenGL, "Using Texture3 without enabling it"); | ||
| 198 | out += "vec4(0.0)"; | ||
| 199 | } | ||
| 200 | break; | ||
| 178 | case Source::PreviousBuffer: | 201 | case Source::PreviousBuffer: |
| 179 | out += "combiner_buffer"; | 202 | out += "combiner_buffer"; |
| 180 | break; | 203 | break; |
| @@ -483,9 +506,18 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 483 | if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { | 506 | if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { |
| 484 | // Bump mapping is enabled using a normal map, read perturbation vector from the selected | 507 | // Bump mapping is enabled using a normal map, read perturbation vector from the selected |
| 485 | // texture | 508 | // texture |
| 486 | std::string bump_selector = std::to_string(lighting.bump_selector); | 509 | if (lighting.bump_selector == 3) { |
| 487 | out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], " + | 510 | if (config.state.proctex.enable) { |
| 488 | TexCoord(config, lighting.bump_selector) + ").rgb - 1.0;\n"; | 511 | out += "vec3 surface_normal = 2.0 * ProcTex().rgb - 1.0;\n"; |
| 512 | } else { | ||
| 513 | LOG_ERROR(Render_OpenGL, "Using Texture3 without enabling it"); | ||
| 514 | out += "vec3 surface_normal = vec3(-1.0);\n"; | ||
| 515 | } | ||
| 516 | } else { | ||
| 517 | std::string bump_selector = std::to_string(lighting.bump_selector); | ||
| 518 | out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], " + | ||
| 519 | TexCoord(config, lighting.bump_selector) + ").rgb - 1.0;\n"; | ||
| 520 | } | ||
| 489 | 521 | ||
| 490 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher | 522 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher |
| 491 | // precision result | 523 | // precision result |
| @@ -693,6 +725,221 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 693 | out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; | 725 | out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; |
| 694 | } | 726 | } |
| 695 | 727 | ||
| 728 | using ProcTexClamp = TexturingRegs::ProcTexClamp; | ||
| 729 | using ProcTexShift = TexturingRegs::ProcTexShift; | ||
| 730 | using ProcTexCombiner = TexturingRegs::ProcTexCombiner; | ||
| 731 | using ProcTexFilter = TexturingRegs::ProcTexFilter; | ||
| 732 | |||
| 733 | void AppendProcTexShiftOffset(std::string& out, const std::string& v, ProcTexShift mode, | ||
| 734 | ProcTexClamp clamp_mode) { | ||
| 735 | std::string offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; | ||
| 736 | switch (mode) { | ||
| 737 | case ProcTexShift::None: | ||
| 738 | out += "0"; | ||
| 739 | break; | ||
| 740 | case ProcTexShift::Odd: | ||
| 741 | out += offset + " * ((int(" + v + ") / 2) % 2)"; | ||
| 742 | break; | ||
| 743 | case ProcTexShift::Even: | ||
| 744 | out += offset + " * (((int(" + v + ") + 1) / 2) % 2)"; | ||
| 745 | break; | ||
| 746 | default: | ||
| 747 | LOG_CRITICAL(HW_GPU, "Unknown shift mode %u", static_cast<u32>(mode)); | ||
| 748 | out += "0"; | ||
| 749 | break; | ||
| 750 | } | ||
| 751 | } | ||
| 752 | |||
| 753 | void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp mode) { | ||
| 754 | switch (mode) { | ||
| 755 | case ProcTexClamp::ToZero: | ||
| 756 | out += var + " = " + var + " > 1.0 ? 0 : " + var + ";\n"; | ||
| 757 | break; | ||
| 758 | case ProcTexClamp::ToEdge: | ||
| 759 | out += var + " = " + "min(" + var + ", 1.0);\n"; | ||
| 760 | break; | ||
| 761 | case ProcTexClamp::SymmetricalRepeat: | ||
| 762 | out += var + " = " + "fract(" + var + ");\n"; | ||
| 763 | break; | ||
| 764 | case ProcTexClamp::MirroredRepeat: { | ||
| 765 | out += | ||
| 766 | var + " = int(" + var + ") % 2 == 0 ? fract(" + var + ") : 1.0 - fract(" + var + ");\n"; | ||
| 767 | break; | ||
| 768 | } | ||
| 769 | case ProcTexClamp::Pulse: | ||
| 770 | out += var + " = " + var + " > 0.5 ? 1.0 : 0.0;\n"; | ||
| 771 | break; | ||
| 772 | default: | ||
| 773 | LOG_CRITICAL(HW_GPU, "Unknown clamp mode %u", static_cast<u32>(mode)); | ||
| 774 | out += var + " = " + "min(" + var + ", 1.0);\n"; | ||
| 775 | break; | ||
| 776 | } | ||
| 777 | } | ||
| 778 | |||
| 779 | void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, | ||
| 780 | const std::string& map_lut) { | ||
| 781 | std::string combined; | ||
| 782 | switch (combiner) { | ||
| 783 | case ProcTexCombiner::U: | ||
| 784 | combined = "u"; | ||
| 785 | break; | ||
| 786 | case ProcTexCombiner::U2: | ||
| 787 | combined = "(u * u)"; | ||
| 788 | break; | ||
| 789 | case TexturingRegs::ProcTexCombiner::V: | ||
| 790 | combined = "v"; | ||
| 791 | break; | ||
| 792 | case TexturingRegs::ProcTexCombiner::V2: | ||
| 793 | combined = "(v * v)"; | ||
| 794 | break; | ||
| 795 | case TexturingRegs::ProcTexCombiner::Add: | ||
| 796 | combined = "((u + v) * 0.5)"; | ||
| 797 | break; | ||
| 798 | case TexturingRegs::ProcTexCombiner::Add2: | ||
| 799 | combined = "((u * u + v * v) * 0.5)"; | ||
| 800 | break; | ||
| 801 | case TexturingRegs::ProcTexCombiner::SqrtAdd2: | ||
| 802 | combined = "min(sqrt(u * u + v * v), 1.0)"; | ||
| 803 | break; | ||
| 804 | case TexturingRegs::ProcTexCombiner::Min: | ||
| 805 | combined = "min(u, v)"; | ||
| 806 | break; | ||
| 807 | case TexturingRegs::ProcTexCombiner::Max: | ||
| 808 | combined = "max(u, v)"; | ||
| 809 | break; | ||
| 810 | case TexturingRegs::ProcTexCombiner::RMax: | ||
| 811 | combined = "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; | ||
| 812 | break; | ||
| 813 | default: | ||
| 814 | LOG_CRITICAL(HW_GPU, "Unknown combiner %u", static_cast<u32>(combiner)); | ||
| 815 | combined = "0.0"; | ||
| 816 | break; | ||
| 817 | } | ||
| 818 | out += "ProcTexLookupLUT(" + map_lut + ", " + combined + ")"; | ||
| 819 | } | ||
| 820 | |||
| 821 | void AppendProcTexSampler(std::string& out, const PicaShaderConfig& config) { | ||
| 822 | // LUT sampling uitlity | ||
| 823 | // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and | ||
| 824 | // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using | ||
| 825 | // value entries and difference entries. | ||
| 826 | out += R"( | ||
| 827 | float ProcTexLookupLUT(sampler1D lut, float coord) { | ||
| 828 | coord *= 128; | ||
| 829 | float index_i = clamp(floor(coord), 0.0, 127.0); | ||
| 830 | float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be | ||
| 831 | // extracted as index_i = 127.0 and index_f = 1.0 | ||
| 832 | vec2 entry = texelFetch(lut, int(index_i), 0).rg; | ||
| 833 | return clamp(entry.r + entry.g * index_f, 0.0, 1.0); | ||
| 834 | } | ||
| 835 | )"; | ||
| 836 | |||
| 837 | // Noise utility | ||
| 838 | if (config.state.proctex.noise_enable) { | ||
| 839 | // See swrasterizer/proctex.cpp for more information about these functions | ||
| 840 | out += R"( | ||
| 841 | int ProcTexNoiseRand1D(int v) { | ||
| 842 | const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); | ||
| 843 | return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; | ||
| 844 | } | ||
| 845 | |||
| 846 | float ProcTexNoiseRand2D(vec2 point) { | ||
| 847 | const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); | ||
| 848 | int u2 = ProcTexNoiseRand1D(int(point.x)); | ||
| 849 | int v2 = ProcTexNoiseRand1D(int(point.y)); | ||
| 850 | v2 += ((u2 & 3) == 1) ? 4 : 0; | ||
| 851 | v2 ^= (u2 & 1) * 6; | ||
| 852 | v2 += 10 + u2; | ||
| 853 | v2 &= 0xF; | ||
| 854 | v2 ^= table[u2]; | ||
| 855 | return -1.0 + float(v2) * 2.0/ 15.0; | ||
| 856 | } | ||
| 857 | |||
| 858 | float ProcTexNoiseCoef(vec2 x) { | ||
| 859 | vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); | ||
| 860 | vec2 point = floor(grid); | ||
| 861 | vec2 frac = grid - point; | ||
| 862 | |||
| 863 | float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); | ||
| 864 | float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); | ||
| 865 | float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); | ||
| 866 | float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); | ||
| 867 | |||
| 868 | float x_noise = ProcTexLookupLUT(proctex_noise_lut, frac.x); | ||
| 869 | float y_noise = ProcTexLookupLUT(proctex_noise_lut, frac.y); | ||
| 870 | float x0 = mix(g0, g1, x_noise); | ||
| 871 | float x1 = mix(g2, g3, x_noise); | ||
| 872 | return mix(x0, x1, y_noise); | ||
| 873 | } | ||
| 874 | )"; | ||
| 875 | } | ||
| 876 | |||
| 877 | out += "vec4 ProcTex() {\n"; | ||
| 878 | out += "vec2 uv = abs(texcoord[" + std::to_string(config.state.proctex.coord) + "]);\n"; | ||
| 879 | |||
| 880 | // Get shift offset before noise generation | ||
| 881 | out += "float u_shift = "; | ||
| 882 | AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, | ||
| 883 | config.state.proctex.u_clamp); | ||
| 884 | out += ";\n"; | ||
| 885 | out += "float v_shift = "; | ||
| 886 | AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, | ||
| 887 | config.state.proctex.v_clamp); | ||
| 888 | out += ";\n"; | ||
| 889 | |||
| 890 | // Generate noise | ||
| 891 | if (config.state.proctex.noise_enable) { | ||
| 892 | out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n"; | ||
| 893 | out += "uv = abs(uv);\n"; | ||
| 894 | } | ||
| 895 | |||
| 896 | // Shift | ||
| 897 | out += "float u = uv.x + u_shift;\n"; | ||
| 898 | out += "float v = uv.y + v_shift;\n"; | ||
| 899 | |||
| 900 | // Clamp | ||
| 901 | AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); | ||
| 902 | AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); | ||
| 903 | |||
| 904 | // Combine and map | ||
| 905 | out += "float lut_coord = "; | ||
| 906 | AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map"); | ||
| 907 | out += ";\n"; | ||
| 908 | |||
| 909 | // Look up color | ||
| 910 | // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] | ||
| 911 | out += "lut_coord *= " + std::to_string(config.state.proctex.lut_width - 1) + ";\n"; | ||
| 912 | // TODO(wwylele): implement mipmap | ||
| 913 | switch (config.state.proctex.lut_filter) { | ||
| 914 | case ProcTexFilter::Linear: | ||
| 915 | case ProcTexFilter::LinearMipmapLinear: | ||
| 916 | case ProcTexFilter::LinearMipmapNearest: | ||
| 917 | out += "int lut_index_i = int(lut_coord) + " + | ||
| 918 | std::to_string(config.state.proctex.lut_offset) + ";\n"; | ||
| 919 | out += "float lut_index_f = fract(lut_coord);\n"; | ||
| 920 | out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i, 0) + lut_index_f * " | ||
| 921 | "texelFetch(proctex_diff_lut, lut_index_i, 0);\n"; | ||
| 922 | break; | ||
| 923 | case ProcTexFilter::Nearest: | ||
| 924 | case ProcTexFilter::NearestMipmapLinear: | ||
| 925 | case ProcTexFilter::NearestMipmapNearest: | ||
| 926 | out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n"; | ||
| 927 | out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)), 0);\n"; | ||
| 928 | break; | ||
| 929 | } | ||
| 930 | |||
| 931 | if (config.state.proctex.separate_alpha) { | ||
| 932 | // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It | ||
| 933 | // uses the output of CombineAndMap directly instead. | ||
| 934 | out += "float final_alpha = "; | ||
| 935 | AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map"); | ||
| 936 | out += ";\n"; | ||
| 937 | out += "return vec4(final_color.xyz, final_alpha);\n}\n"; | ||
| 938 | } else { | ||
| 939 | out += "return final_color;\n}\n"; | ||
| 940 | } | ||
| 941 | } | ||
| 942 | |||
| 696 | std::string GenerateFragmentShader(const PicaShaderConfig& config) { | 943 | std::string GenerateFragmentShader(const PicaShaderConfig& config) { |
| 697 | const auto& state = config.state; | 944 | const auto& state = config.state; |
| 698 | 945 | ||
| @@ -735,6 +982,9 @@ layout (std140) uniform shader_data { | |||
| 735 | int scissor_x2; | 982 | int scissor_x2; |
| 736 | int scissor_y2; | 983 | int scissor_y2; |
| 737 | vec3 fog_color; | 984 | vec3 fog_color; |
| 985 | vec2 proctex_noise_f; | ||
| 986 | vec2 proctex_noise_a; | ||
| 987 | vec2 proctex_noise_p; | ||
| 738 | vec3 lighting_global_ambient; | 988 | vec3 lighting_global_ambient; |
| 739 | LightSrc light_src[NUM_LIGHTS]; | 989 | LightSrc light_src[NUM_LIGHTS]; |
| 740 | vec4 const_color[NUM_TEV_STAGES]; | 990 | vec4 const_color[NUM_TEV_STAGES]; |
| @@ -744,12 +994,21 @@ layout (std140) uniform shader_data { | |||
| 744 | uniform sampler2D tex[3]; | 994 | uniform sampler2D tex[3]; |
| 745 | uniform sampler1D lut[6]; | 995 | uniform sampler1D lut[6]; |
| 746 | uniform usampler1D fog_lut; | 996 | uniform usampler1D fog_lut; |
| 997 | uniform sampler1D proctex_noise_lut; | ||
| 998 | uniform sampler1D proctex_color_map; | ||
| 999 | uniform sampler1D proctex_alpha_map; | ||
| 1000 | uniform sampler1D proctex_lut; | ||
| 1001 | uniform sampler1D proctex_diff_lut; | ||
| 747 | 1002 | ||
| 748 | // Rotate the vector v by the quaternion q | 1003 | // Rotate the vector v by the quaternion q |
| 749 | vec3 quaternion_rotate(vec4 q, vec3 v) { | 1004 | vec3 quaternion_rotate(vec4 q, vec3 v) { |
| 750 | return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); | 1005 | return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); |
| 751 | } | 1006 | })"; |
| 752 | 1007 | ||
| 1008 | if (config.state.proctex.enable) | ||
| 1009 | AppendProcTexSampler(out, config); | ||
| 1010 | |||
| 1011 | out += R"( | ||
| 753 | void main() { | 1012 | void main() { |
| 754 | vec4 primary_fragment_color = vec4(0.0); | 1013 | vec4 primary_fragment_color = vec4(0.0); |
| 755 | vec4 secondary_fragment_color = vec4(0.0); | 1014 | vec4 secondary_fragment_color = vec4(0.0); |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3fb046b76..ea6d216d1 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -113,6 +113,19 @@ union PicaShaderConfig { | |||
| 113 | } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; | 113 | } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; |
| 114 | } lighting; | 114 | } lighting; |
| 115 | 115 | ||
| 116 | struct { | ||
| 117 | bool enable; | ||
| 118 | u32 coord; | ||
| 119 | Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; | ||
| 120 | Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; | ||
| 121 | bool separate_alpha; | ||
| 122 | bool noise_enable; | ||
| 123 | Pica::TexturingRegs::ProcTexShift u_shift, v_shift; | ||
| 124 | u32 lut_width; | ||
| 125 | u32 lut_offset; | ||
| 126 | Pica::TexturingRegs::ProcTexFilter lut_filter; | ||
| 127 | } proctex; | ||
| 128 | |||
| 116 | } state; | 129 | } state; |
| 117 | }; | 130 | }; |
| 118 | #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) | 131 | #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 3c03b424a..bf837a7fb 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -58,6 +58,12 @@ OpenGLState::OpenGLState() { | |||
| 58 | 58 | ||
| 59 | fog_lut.texture_1d = 0; | 59 | fog_lut.texture_1d = 0; |
| 60 | 60 | ||
| 61 | proctex_lut.texture_1d = 0; | ||
| 62 | proctex_diff_lut.texture_1d = 0; | ||
| 63 | proctex_color_map.texture_1d = 0; | ||
| 64 | proctex_alpha_map.texture_1d = 0; | ||
| 65 | proctex_noise_lut.texture_1d = 0; | ||
| 66 | |||
| 61 | draw.read_framebuffer = 0; | 67 | draw.read_framebuffer = 0; |
| 62 | draw.draw_framebuffer = 0; | 68 | draw.draw_framebuffer = 0; |
| 63 | draw.vertex_array = 0; | 69 | draw.vertex_array = 0; |
| @@ -201,6 +207,36 @@ void OpenGLState::Apply() const { | |||
| 201 | glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d); | 207 | glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d); |
| 202 | } | 208 | } |
| 203 | 209 | ||
| 210 | // ProcTex Noise LUT | ||
| 211 | if (proctex_noise_lut.texture_1d != cur_state.proctex_noise_lut.texture_1d) { | ||
| 212 | glActiveTexture(GL_TEXTURE10); | ||
| 213 | glBindTexture(GL_TEXTURE_1D, proctex_noise_lut.texture_1d); | ||
| 214 | } | ||
| 215 | |||
| 216 | // ProcTex Color Map | ||
| 217 | if (proctex_color_map.texture_1d != cur_state.proctex_color_map.texture_1d) { | ||
| 218 | glActiveTexture(GL_TEXTURE11); | ||
| 219 | glBindTexture(GL_TEXTURE_1D, proctex_color_map.texture_1d); | ||
| 220 | } | ||
| 221 | |||
| 222 | // ProcTex Alpha Map | ||
| 223 | if (proctex_alpha_map.texture_1d != cur_state.proctex_alpha_map.texture_1d) { | ||
| 224 | glActiveTexture(GL_TEXTURE12); | ||
| 225 | glBindTexture(GL_TEXTURE_1D, proctex_alpha_map.texture_1d); | ||
| 226 | } | ||
| 227 | |||
| 228 | // ProcTex LUT | ||
| 229 | if (proctex_lut.texture_1d != cur_state.proctex_lut.texture_1d) { | ||
| 230 | glActiveTexture(GL_TEXTURE13); | ||
| 231 | glBindTexture(GL_TEXTURE_1D, proctex_lut.texture_1d); | ||
| 232 | } | ||
| 233 | |||
| 234 | // ProcTex Diff LUT | ||
| 235 | if (proctex_diff_lut.texture_1d != cur_state.proctex_diff_lut.texture_1d) { | ||
| 236 | glActiveTexture(GL_TEXTURE14); | ||
| 237 | glBindTexture(GL_TEXTURE_1D, proctex_diff_lut.texture_1d); | ||
| 238 | } | ||
| 239 | |||
| 204 | // Framebuffer | 240 | // Framebuffer |
| 205 | if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { | 241 | if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { |
| 206 | glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); | 242 | glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index aee3c2946..7dcc03bd5 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -72,6 +72,26 @@ public: | |||
| 72 | } fog_lut; | 72 | } fog_lut; |
| 73 | 73 | ||
| 74 | struct { | 74 | struct { |
| 75 | GLuint texture_1d; // GL_TEXTURE_BINDING_1D | ||
| 76 | } proctex_noise_lut; | ||
| 77 | |||
| 78 | struct { | ||
| 79 | GLuint texture_1d; // GL_TEXTURE_BINDING_1D | ||
| 80 | } proctex_color_map; | ||
| 81 | |||
| 82 | struct { | ||
| 83 | GLuint texture_1d; // GL_TEXTURE_BINDING_1D | ||
| 84 | } proctex_alpha_map; | ||
| 85 | |||
| 86 | struct { | ||
| 87 | GLuint texture_1d; // GL_TEXTURE_BINDING_1D | ||
| 88 | } proctex_lut; | ||
| 89 | |||
| 90 | struct { | ||
| 91 | GLuint texture_1d; // GL_TEXTURE_BINDING_1D | ||
| 92 | } proctex_diff_lut; | ||
| 93 | |||
| 94 | struct { | ||
| 75 | GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING | 95 | GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING |
| 76 | GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING | 96 | GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING |
| 77 | GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING | 97 | GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING |
diff --git a/src/video_core/swrasterizer/proctex.cpp b/src/video_core/swrasterizer/proctex.cpp new file mode 100644 index 000000000..b69892778 --- /dev/null +++ b/src/video_core/swrasterizer/proctex.cpp | |||
| @@ -0,0 +1,223 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cmath> | ||
| 7 | #include "common/math_util.h" | ||
| 8 | #include "video_core/swrasterizer/proctex.h" | ||
| 9 | |||
| 10 | namespace Pica { | ||
| 11 | namespace Rasterizer { | ||
| 12 | |||
| 13 | using ProcTexClamp = TexturingRegs::ProcTexClamp; | ||
| 14 | using ProcTexShift = TexturingRegs::ProcTexShift; | ||
| 15 | using ProcTexCombiner = TexturingRegs::ProcTexCombiner; | ||
| 16 | using ProcTexFilter = TexturingRegs::ProcTexFilter; | ||
| 17 | |||
| 18 | static float LookupLUT(const std::array<State::ProcTex::ValueEntry, 128>& lut, float coord) { | ||
| 19 | // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and | ||
| 20 | // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using | ||
| 21 | // value entries and difference entries. | ||
| 22 | coord *= 128; | ||
| 23 | const int index_int = std::min(static_cast<int>(coord), 127); | ||
| 24 | const float frac = coord - index_int; | ||
| 25 | return lut[index_int].ToFloat() + frac * lut[index_int].DiffToFloat(); | ||
| 26 | } | ||
| 27 | |||
| 28 | // These function are used to generate random noise for procedural texture. Their results are | ||
| 29 | // verified against real hardware, but it's not known if the algorithm is the same as hardware. | ||
| 30 | static unsigned int NoiseRand1D(unsigned int v) { | ||
| 31 | static constexpr std::array<unsigned int, 16> table{ | ||
| 32 | {0, 4, 10, 8, 4, 9, 7, 12, 5, 15, 13, 14, 11, 15, 2, 11}}; | ||
| 33 | return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; | ||
| 34 | } | ||
| 35 | |||
| 36 | static float NoiseRand2D(unsigned int x, unsigned int y) { | ||
| 37 | static constexpr std::array<unsigned int, 16> table{ | ||
| 38 | {10, 2, 15, 8, 0, 7, 4, 5, 5, 13, 2, 6, 13, 9, 3, 14}}; | ||
| 39 | unsigned int u2 = NoiseRand1D(x); | ||
| 40 | unsigned int v2 = NoiseRand1D(y); | ||
| 41 | v2 += ((u2 & 3) == 1) ? 4 : 0; | ||
| 42 | v2 ^= (u2 & 1) * 6; | ||
| 43 | v2 += 10 + u2; | ||
| 44 | v2 &= 0xF; | ||
| 45 | v2 ^= table[u2]; | ||
| 46 | return -1.0f + v2 * 2.0f / 15.0f; | ||
| 47 | } | ||
| 48 | |||
| 49 | static float NoiseCoef(float u, float v, TexturingRegs regs, State::ProcTex state) { | ||
| 50 | const float freq_u = float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(); | ||
| 51 | const float freq_v = float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(); | ||
| 52 | const float phase_u = float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(); | ||
| 53 | const float phase_v = float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(); | ||
| 54 | const float x = 9 * freq_u * std::abs(u + phase_u); | ||
| 55 | const float y = 9 * freq_v * std::abs(v + phase_v); | ||
| 56 | const int x_int = static_cast<int>(x); | ||
| 57 | const int y_int = static_cast<int>(y); | ||
| 58 | const float x_frac = x - x_int; | ||
| 59 | const float y_frac = y - y_int; | ||
| 60 | |||
| 61 | const float g0 = NoiseRand2D(x_int, y_int) * (x_frac + y_frac); | ||
| 62 | const float g1 = NoiseRand2D(x_int + 1, y_int) * (x_frac + y_frac - 1); | ||
| 63 | const float g2 = NoiseRand2D(x_int, y_int + 1) * (x_frac + y_frac - 1); | ||
| 64 | const float g3 = NoiseRand2D(x_int + 1, y_int + 1) * (x_frac + y_frac - 2); | ||
| 65 | const float x_noise = LookupLUT(state.noise_table, x_frac); | ||
| 66 | const float y_noise = LookupLUT(state.noise_table, y_frac); | ||
| 67 | return Math::BilinearInterp(g0, g1, g2, g3, x_noise, y_noise); | ||
| 68 | } | ||
| 69 | |||
| 70 | static float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) { | ||
| 71 | const float offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? 1 : 0.5f; | ||
| 72 | switch (mode) { | ||
| 73 | case ProcTexShift::None: | ||
| 74 | return 0; | ||
| 75 | case ProcTexShift::Odd: | ||
| 76 | return offset * (((int)v / 2) % 2); | ||
| 77 | case ProcTexShift::Even: | ||
| 78 | return offset * ((((int)v + 1) / 2) % 2); | ||
| 79 | default: | ||
| 80 | LOG_CRITICAL(HW_GPU, "Unknown shift mode %u", static_cast<u32>(mode)); | ||
| 81 | return 0; | ||
| 82 | } | ||
| 83 | }; | ||
| 84 | |||
| 85 | static void ClampCoord(float& coord, ProcTexClamp mode) { | ||
| 86 | switch (mode) { | ||
| 87 | case ProcTexClamp::ToZero: | ||
| 88 | if (coord > 1.0f) | ||
| 89 | coord = 0.0f; | ||
| 90 | break; | ||
| 91 | case ProcTexClamp::ToEdge: | ||
| 92 | coord = std::min(coord, 1.0f); | ||
| 93 | break; | ||
| 94 | case ProcTexClamp::SymmetricalRepeat: | ||
| 95 | coord = coord - std::floor(coord); | ||
| 96 | break; | ||
| 97 | case ProcTexClamp::MirroredRepeat: { | ||
| 98 | int integer = static_cast<int>(coord); | ||
| 99 | float frac = coord - integer; | ||
| 100 | coord = (integer % 2) == 0 ? frac : (1.0f - frac); | ||
| 101 | break; | ||
| 102 | } | ||
| 103 | case ProcTexClamp::Pulse: | ||
| 104 | if (coord <= 0.5f) | ||
| 105 | coord = 0.0f; | ||
| 106 | else | ||
| 107 | coord = 1.0f; | ||
| 108 | break; | ||
| 109 | default: | ||
| 110 | LOG_CRITICAL(HW_GPU, "Unknown clamp mode %u", static_cast<u32>(mode)); | ||
| 111 | coord = std::min(coord, 1.0f); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | float CombineAndMap(float u, float v, ProcTexCombiner combiner, | ||
| 117 | const std::array<State::ProcTex::ValueEntry, 128>& map_table) { | ||
| 118 | float f; | ||
| 119 | switch (combiner) { | ||
| 120 | case ProcTexCombiner::U: | ||
| 121 | f = u; | ||
| 122 | break; | ||
| 123 | case ProcTexCombiner::U2: | ||
| 124 | f = u * u; | ||
| 125 | break; | ||
| 126 | case TexturingRegs::ProcTexCombiner::V: | ||
| 127 | f = v; | ||
| 128 | break; | ||
| 129 | case TexturingRegs::ProcTexCombiner::V2: | ||
| 130 | f = v * v; | ||
| 131 | break; | ||
| 132 | case TexturingRegs::ProcTexCombiner::Add: | ||
| 133 | f = (u + v) * 0.5f; | ||
| 134 | break; | ||
| 135 | case TexturingRegs::ProcTexCombiner::Add2: | ||
| 136 | f = (u * u + v * v) * 0.5f; | ||
| 137 | break; | ||
| 138 | case TexturingRegs::ProcTexCombiner::SqrtAdd2: | ||
| 139 | f = std::min(std::sqrt(u * u + v * v), 1.0f); | ||
| 140 | break; | ||
| 141 | case TexturingRegs::ProcTexCombiner::Min: | ||
| 142 | f = std::min(u, v); | ||
| 143 | break; | ||
| 144 | case TexturingRegs::ProcTexCombiner::Max: | ||
| 145 | f = std::max(u, v); | ||
| 146 | break; | ||
| 147 | case TexturingRegs::ProcTexCombiner::RMax: | ||
| 148 | f = std::min(((u + v) * 0.5f + std::sqrt(u * u + v * v)) * 0.5f, 1.0f); | ||
| 149 | break; | ||
| 150 | default: | ||
| 151 | LOG_CRITICAL(HW_GPU, "Unknown combiner %u", static_cast<u32>(combiner)); | ||
| 152 | f = 0.0f; | ||
| 153 | break; | ||
| 154 | } | ||
| 155 | return LookupLUT(map_table, f); | ||
| 156 | } | ||
| 157 | |||
| 158 | Math::Vec4<u8> ProcTex(float u, float v, TexturingRegs regs, State::ProcTex state) { | ||
| 159 | u = std::abs(u); | ||
| 160 | v = std::abs(v); | ||
| 161 | |||
| 162 | // Get shift offset before noise generation | ||
| 163 | const float u_shift = GetShiftOffset(v, regs.proctex.u_shift, regs.proctex.u_clamp); | ||
| 164 | const float v_shift = GetShiftOffset(u, regs.proctex.v_shift, regs.proctex.v_clamp); | ||
| 165 | |||
| 166 | // Generate noise | ||
| 167 | if (regs.proctex.noise_enable) { | ||
| 168 | float noise = NoiseCoef(u, v, regs, state); | ||
| 169 | u += noise * regs.proctex_noise_u.amplitude / 4095.0f; | ||
| 170 | v += noise * regs.proctex_noise_v.amplitude / 4095.0f; | ||
| 171 | u = std::abs(u); | ||
| 172 | v = std::abs(v); | ||
| 173 | } | ||
| 174 | |||
| 175 | // Shift | ||
| 176 | u += u_shift; | ||
| 177 | v += v_shift; | ||
| 178 | |||
| 179 | // Clamp | ||
| 180 | ClampCoord(u, regs.proctex.u_clamp); | ||
| 181 | ClampCoord(v, regs.proctex.v_clamp); | ||
| 182 | |||
| 183 | // Combine and map | ||
| 184 | const float lut_coord = CombineAndMap(u, v, regs.proctex.color_combiner, state.color_map_table); | ||
| 185 | |||
| 186 | // Look up the color | ||
| 187 | // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] | ||
| 188 | const u32 offset = regs.proctex_lut_offset; | ||
| 189 | const u32 width = regs.proctex_lut.width; | ||
| 190 | const float index = offset + (lut_coord * (width - 1)); | ||
| 191 | Math::Vec4<u8> final_color; | ||
| 192 | // TODO(wwylele): implement mipmap | ||
| 193 | switch (regs.proctex_lut.filter) { | ||
| 194 | case ProcTexFilter::Linear: | ||
| 195 | case ProcTexFilter::LinearMipmapLinear: | ||
| 196 | case ProcTexFilter::LinearMipmapNearest: { | ||
| 197 | const int index_int = static_cast<int>(index); | ||
| 198 | const float frac = index - index_int; | ||
| 199 | const auto color_value = state.color_table[index_int].ToVector().Cast<float>(); | ||
| 200 | const auto color_diff = state.color_diff_table[index_int].ToVector().Cast<float>(); | ||
| 201 | final_color = (color_value + frac * color_diff).Cast<u8>(); | ||
| 202 | break; | ||
| 203 | } | ||
| 204 | case ProcTexFilter::Nearest: | ||
| 205 | case ProcTexFilter::NearestMipmapLinear: | ||
| 206 | case ProcTexFilter::NearestMipmapNearest: | ||
| 207 | final_color = state.color_table[static_cast<int>(std::round(index))].ToVector(); | ||
| 208 | break; | ||
| 209 | } | ||
| 210 | |||
| 211 | if (regs.proctex.separate_alpha) { | ||
| 212 | // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It | ||
| 213 | // uses the output of CombineAndMap directly instead. | ||
| 214 | const float final_alpha = | ||
| 215 | CombineAndMap(u, v, regs.proctex.alpha_combiner, state.alpha_map_table); | ||
| 216 | return Math::MakeVec<u8>(final_color.rgb(), static_cast<u8>(final_alpha * 255)); | ||
| 217 | } else { | ||
| 218 | return final_color; | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | } // namespace Rasterizer | ||
| 223 | } // namespace Pica | ||
diff --git a/src/video_core/swrasterizer/proctex.h b/src/video_core/swrasterizer/proctex.h new file mode 100644 index 000000000..036e4620e --- /dev/null +++ b/src/video_core/swrasterizer/proctex.h | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "common/vector_math.h" | ||
| 7 | #include "video_core/pica_state.h" | ||
| 8 | |||
| 9 | namespace Pica { | ||
| 10 | namespace Rasterizer { | ||
| 11 | |||
| 12 | /// Generates procedural texture color for the given coordinates | ||
| 13 | Math::Vec4<u8> ProcTex(float u, float v, TexturingRegs regs, State::ProcTex state); | ||
| 14 | |||
| 15 | } // namespace Rasterizer | ||
| 16 | } // namespace Pica | ||
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 20addf0bd..e9edf0360 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "video_core/regs_texturing.h" | 23 | #include "video_core/regs_texturing.h" |
| 24 | #include "video_core/shader/shader.h" | 24 | #include "video_core/shader/shader.h" |
| 25 | #include "video_core/swrasterizer/framebuffer.h" | 25 | #include "video_core/swrasterizer/framebuffer.h" |
| 26 | #include "video_core/swrasterizer/proctex.h" | ||
| 26 | #include "video_core/swrasterizer/rasterizer.h" | 27 | #include "video_core/swrasterizer/rasterizer.h" |
| 27 | #include "video_core/swrasterizer/texturing.h" | 28 | #include "video_core/swrasterizer/texturing.h" |
| 28 | #include "video_core/texture/texture_decode.h" | 29 | #include "video_core/texture/texture_decode.h" |
| @@ -268,7 +269,7 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 268 | uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); | 269 | uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); |
| 269 | uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); | 270 | uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); |
| 270 | 271 | ||
| 271 | Math::Vec4<u8> texture_color[3]{}; | 272 | Math::Vec4<u8> texture_color[4]{}; |
| 272 | for (int i = 0; i < 3; ++i) { | 273 | for (int i = 0; i < 3; ++i) { |
| 273 | const auto& texture = textures[i]; | 274 | const auto& texture = textures[i]; |
| 274 | if (!texture.enabled) | 275 | if (!texture.enabled) |
| @@ -334,6 +335,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 334 | } | 335 | } |
| 335 | } | 336 | } |
| 336 | 337 | ||
| 338 | // sample procedural texture | ||
| 339 | if (regs.texturing.main_config.texture3_enable) { | ||
| 340 | const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates]; | ||
| 341 | texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(), | ||
| 342 | g_state.regs.texturing, g_state.proctex); | ||
| 343 | } | ||
| 344 | |||
| 337 | // Texture environment - consists of 6 stages of color and alpha combining. | 345 | // Texture environment - consists of 6 stages of color and alpha combining. |
| 338 | // | 346 | // |
| 339 | // Color combiners take three input color values from some source (e.g. interpolated | 347 | // Color combiners take three input color values from some source (e.g. interpolated |
| @@ -376,6 +384,9 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 376 | case Source::Texture2: | 384 | case Source::Texture2: |
| 377 | return texture_color[2]; | 385 | return texture_color[2]; |
| 378 | 386 | ||
| 387 | case Source::Texture3: | ||
| 388 | return texture_color[3]; | ||
| 389 | |||
| 379 | case Source::PreviousBuffer: | 390 | case Source::PreviousBuffer: |
| 380 | return combiner_buffer; | 391 | return combiner_buffer; |
| 381 | 392 | ||