diff options
| author | 2015-07-21 15:24:48 +0200 | |
|---|---|---|
| committer | 2015-07-21 15:24:48 +0200 | |
| commit | aa6dfdb827cd5887db6aeb992ef992a7aa23f766 (patch) | |
| tree | 4612793f498c62d7f582ffe6b464b8450b65e897 /src | |
| parent | Merge pull request #964 from lioncash/svc (diff) | |
| parent | Pica/Shader: Add geometry shader definitions. (diff) | |
| download | yuzu-aa6dfdb827cd5887db6aeb992ef992a7aa23f766.tar.gz yuzu-aa6dfdb827cd5887db6aeb992ef992a7aa23f766.tar.xz yuzu-aa6dfdb827cd5887db6aeb992ef992a7aa23f766.zip | |
Merge pull request #929 from neobrain/geoshader_definitions
Pica/Shader: Add geometry shader definitions.
Diffstat (limited to 'src')
| -rw-r--r-- | src/citra_qt/debugger/graphics_vertex_shader.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 78 | ||||
| -rw-r--r-- | src/video_core/pica.h | 212 | ||||
| -rw-r--r-- | src/video_core/primitive_assembly.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.h | 2 |
6 files changed, 163 insertions, 150 deletions
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 14d3f8f39..db622d846 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp | |||
| @@ -259,7 +259,7 @@ void GraphicsVertexShaderModel::OnUpdate() | |||
| 259 | for (auto pattern : Pica::g_state.vs.swizzle_data) | 259 | for (auto pattern : Pica::g_state.vs.swizzle_data) |
| 260 | info.swizzle_info.push_back({pattern}); | 260 | info.swizzle_info.push_back({pattern}); |
| 261 | 261 | ||
| 262 | info.labels.insert({ Pica::g_state.regs.vs_main_offset, "main" }); | 262 | info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" }); |
| 263 | 263 | ||
| 264 | endResetModel(); | 264 | endResetModel(); |
| 265 | } | 265 | } |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f2e3aee85..8b10d7340 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -45,7 +45,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 45 | if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq)) | 45 | if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq)) |
| 46 | return; | 46 | return; |
| 47 | 47 | ||
| 48 | // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value | 48 | // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value |
| 49 | u32 old_value = regs[id]; | 49 | u32 old_value = regs[id]; |
| 50 | regs[id] = (old_value & ~mask) | (value & mask); | 50 | regs[id] = (old_value & ~mask) | (value & mask); |
| 51 | 51 | ||
| @@ -282,7 +282,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 282 | &geometry_dumper, _1, _2, _3)); | 282 | &geometry_dumper, _1, _2, _3)); |
| 283 | 283 | ||
| 284 | // Send to vertex shader | 284 | // Send to vertex shader |
| 285 | VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); | 285 | VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); |
| 286 | 286 | ||
| 287 | if (is_indexed) { | 287 | if (is_indexed) { |
| 288 | // TODO: Add processed vertex to vertex cache! | 288 | // TODO: Add processed vertex to vertex cache! |
| @@ -321,35 +321,35 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 321 | break; | 321 | break; |
| 322 | } | 322 | } |
| 323 | 323 | ||
| 324 | case PICA_REG_INDEX(vs_bool_uniforms): | 324 | case PICA_REG_INDEX(vs.bool_uniforms): |
| 325 | for (unsigned i = 0; i < 16; ++i) | 325 | for (unsigned i = 0; i < 16; ++i) |
| 326 | g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0; | 326 | g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; |
| 327 | 327 | ||
| 328 | break; | 328 | break; |
| 329 | 329 | ||
| 330 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): | 330 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): |
| 331 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): | 331 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): |
| 332 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): | 332 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): |
| 333 | case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): | 333 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): |
| 334 | { | 334 | { |
| 335 | int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); | 335 | int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); |
| 336 | auto values = regs.vs_int_uniforms[index]; | 336 | auto values = regs.vs.int_uniforms[index]; |
| 337 | g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | 337 | g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); |
| 338 | LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", | 338 | LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", |
| 339 | index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); | 339 | index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); |
| 340 | break; | 340 | break; |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): | 343 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): |
| 344 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): | 344 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): |
| 345 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): | 345 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): |
| 346 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): | 346 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): |
| 347 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): | 347 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): |
| 348 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): | 348 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): |
| 349 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): | 349 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): |
| 350 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): | 350 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): |
| 351 | { | 351 | { |
| 352 | auto& uniform_setup = regs.vs_uniform_setup; | 352 | auto& uniform_setup = regs.vs.uniform_setup; |
| 353 | 353 | ||
| 354 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | 354 | // TODO: Does actual hardware indeed keep an intermediate buffer or does |
| 355 | // it directly write the values? | 355 | // it directly write the values? |
| @@ -392,32 +392,32 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 392 | } | 392 | } |
| 393 | 393 | ||
| 394 | // Load shader program code | 394 | // Load shader program code |
| 395 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): | 395 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): |
| 396 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): | 396 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): |
| 397 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): | 397 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): |
| 398 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): | 398 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): |
| 399 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): | 399 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): |
| 400 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): | 400 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): |
| 401 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): | 401 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): |
| 402 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): | 402 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): |
| 403 | { | 403 | { |
| 404 | g_state.vs.program_code[regs.vs_program.offset] = value; | 404 | g_state.vs.program_code[regs.vs.program.offset] = value; |
| 405 | regs.vs_program.offset++; | 405 | regs.vs.program.offset++; |
| 406 | break; | 406 | break; |
| 407 | } | 407 | } |
| 408 | 408 | ||
| 409 | // Load swizzle pattern data | 409 | // Load swizzle pattern data |
| 410 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): | 410 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): |
| 411 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): | 411 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): |
| 412 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): | 412 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): |
| 413 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): | 413 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): |
| 414 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): | 414 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): |
| 415 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): | 415 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): |
| 416 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): | 416 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): |
| 417 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): | 417 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): |
| 418 | { | 418 | { |
| 419 | g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value; | 419 | g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; |
| 420 | regs.vs_swizzle_patterns.offset++; | 420 | regs.vs.swizzle_patterns.offset++; |
| 421 | break; | 421 | break; |
| 422 | } | 422 | } |
| 423 | 423 | ||
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 628e73213..5da182794 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -785,112 +785,119 @@ struct Regs { | |||
| 785 | INSERT_PADDING_WORDS(0x20); | 785 | INSERT_PADDING_WORDS(0x20); |
| 786 | 786 | ||
| 787 | enum class TriangleTopology : u32 { | 787 | enum class TriangleTopology : u32 { |
| 788 | List = 0, | 788 | List = 0, |
| 789 | Strip = 1, | 789 | Strip = 1, |
| 790 | Fan = 2, | 790 | Fan = 2, |
| 791 | ListIndexed = 3, // TODO: No idea if this is correct | 791 | Shader = 3, // Programmable setup unit implemented in a geometry shader |
| 792 | }; | 792 | }; |
| 793 | 793 | ||
| 794 | BitField<8, 2, TriangleTopology> triangle_topology; | 794 | BitField<8, 2, TriangleTopology> triangle_topology; |
| 795 | 795 | ||
| 796 | INSERT_PADDING_WORDS(0x51); | 796 | INSERT_PADDING_WORDS(0x21); |
| 797 | 797 | ||
| 798 | BitField<0, 16, u32> vs_bool_uniforms; | 798 | struct ShaderConfig { |
| 799 | union { | 799 | BitField<0, 16, u32> bool_uniforms; |
| 800 | BitField< 0, 8, u32> x; | ||
| 801 | BitField< 8, 8, u32> y; | ||
| 802 | BitField<16, 8, u32> z; | ||
| 803 | BitField<24, 8, u32> w; | ||
| 804 | } vs_int_uniforms[4]; | ||
| 805 | 800 | ||
| 806 | INSERT_PADDING_WORDS(0x5); | 801 | union { |
| 802 | BitField< 0, 8, u32> x; | ||
| 803 | BitField< 8, 8, u32> y; | ||
| 804 | BitField<16, 8, u32> z; | ||
| 805 | BitField<24, 8, u32> w; | ||
| 806 | } int_uniforms[4]; | ||
| 807 | 807 | ||
| 808 | // Offset to shader program entry point (in words) | 808 | INSERT_PADDING_WORDS(0x5); |
| 809 | BitField<0, 16, u32> vs_main_offset; | ||
| 810 | 809 | ||
| 811 | union { | 810 | // Offset to shader program entry point (in words) |
| 812 | BitField< 0, 4, u64> attribute0_register; | 811 | BitField<0, 16, u32> main_offset; |
| 813 | BitField< 4, 4, u64> attribute1_register; | 812 | |
| 814 | BitField< 8, 4, u64> attribute2_register; | 813 | union { |
| 815 | BitField<12, 4, u64> attribute3_register; | 814 | BitField< 0, 4, u64> attribute0_register; |
| 816 | BitField<16, 4, u64> attribute4_register; | 815 | BitField< 4, 4, u64> attribute1_register; |
| 817 | BitField<20, 4, u64> attribute5_register; | 816 | BitField< 8, 4, u64> attribute2_register; |
| 818 | BitField<24, 4, u64> attribute6_register; | 817 | BitField<12, 4, u64> attribute3_register; |
| 819 | BitField<28, 4, u64> attribute7_register; | 818 | BitField<16, 4, u64> attribute4_register; |
| 820 | BitField<32, 4, u64> attribute8_register; | 819 | BitField<20, 4, u64> attribute5_register; |
| 821 | BitField<36, 4, u64> attribute9_register; | 820 | BitField<24, 4, u64> attribute6_register; |
| 822 | BitField<40, 4, u64> attribute10_register; | 821 | BitField<28, 4, u64> attribute7_register; |
| 823 | BitField<44, 4, u64> attribute11_register; | 822 | BitField<32, 4, u64> attribute8_register; |
| 824 | BitField<48, 4, u64> attribute12_register; | 823 | BitField<36, 4, u64> attribute9_register; |
| 825 | BitField<52, 4, u64> attribute13_register; | 824 | BitField<40, 4, u64> attribute10_register; |
| 826 | BitField<56, 4, u64> attribute14_register; | 825 | BitField<44, 4, u64> attribute11_register; |
| 827 | BitField<60, 4, u64> attribute15_register; | 826 | BitField<48, 4, u64> attribute12_register; |
| 828 | 827 | BitField<52, 4, u64> attribute13_register; | |
| 829 | int GetRegisterForAttribute(int attribute_index) const { | 828 | BitField<56, 4, u64> attribute14_register; |
| 830 | u64 fields[] = { | 829 | BitField<60, 4, u64> attribute15_register; |
| 831 | attribute0_register, attribute1_register, attribute2_register, attribute3_register, | 830 | |
| 832 | attribute4_register, attribute5_register, attribute6_register, attribute7_register, | 831 | int GetRegisterForAttribute(int attribute_index) const { |
| 833 | attribute8_register, attribute9_register, attribute10_register, attribute11_register, | 832 | u64 fields[] = { |
| 834 | attribute12_register, attribute13_register, attribute14_register, attribute15_register, | 833 | attribute0_register, attribute1_register, attribute2_register, attribute3_register, |
| 834 | attribute4_register, attribute5_register, attribute6_register, attribute7_register, | ||
| 835 | attribute8_register, attribute9_register, attribute10_register, attribute11_register, | ||
| 836 | attribute12_register, attribute13_register, attribute14_register, attribute15_register, | ||
| 837 | }; | ||
| 838 | return (int)fields[attribute_index]; | ||
| 839 | } | ||
| 840 | } input_register_map; | ||
| 841 | |||
| 842 | // OUTMAP_MASK, 0x28E, CODETRANSFER_END | ||
| 843 | INSERT_PADDING_WORDS(0x3); | ||
| 844 | |||
| 845 | struct { | ||
| 846 | enum Format : u32 | ||
| 847 | { | ||
| 848 | FLOAT24 = 0, | ||
| 849 | FLOAT32 = 1 | ||
| 835 | }; | 850 | }; |
| 836 | return (int)fields[attribute_index]; | ||
| 837 | } | ||
| 838 | } vs_input_register_map; | ||
| 839 | 851 | ||
| 840 | INSERT_PADDING_WORDS(0x3); | 852 | bool IsFloat32() const { |
| 853 | return format == FLOAT32; | ||
| 854 | } | ||
| 841 | 855 | ||
| 842 | struct { | 856 | union { |
| 843 | enum Format : u32 | 857 | // Index of the next uniform to write to |
| 844 | { | 858 | // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices |
| 845 | FLOAT24 = 0, | 859 | // TODO: Maybe the uppermost index is for the geometry shader? Investigate! |
| 846 | FLOAT32 = 1 | 860 | BitField<0, 7, u32> index; |
| 847 | }; | ||
| 848 | 861 | ||
| 849 | bool IsFloat32() const { | 862 | BitField<31, 1, Format> format; |
| 850 | return format == FLOAT32; | 863 | }; |
| 851 | } | ||
| 852 | 864 | ||
| 853 | union { | 865 | // Writing to these registers sets the current uniform. |
| 854 | // Index of the next uniform to write to | 866 | u32 set_value[8]; |
| 855 | // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices | ||
| 856 | BitField<0, 7, u32> index; | ||
| 857 | 867 | ||
| 858 | BitField<31, 1, Format> format; | 868 | } uniform_setup; |
| 859 | }; | ||
| 860 | 869 | ||
| 861 | // Writing to these registers sets the "current" uniform. | 870 | INSERT_PADDING_WORDS(0x2); |
| 862 | // TODO: It's not clear how the hardware stores what the "current" uniform is. | ||
| 863 | u32 set_value[8]; | ||
| 864 | 871 | ||
| 865 | } vs_uniform_setup; | 872 | struct { |
| 873 | // Offset of the next instruction to write code to. | ||
| 874 | // Incremented with each instruction write. | ||
| 875 | u32 offset; | ||
| 866 | 876 | ||
| 867 | INSERT_PADDING_WORDS(0x2); | 877 | // Writing to these registers sets the "current" word in the shader program. |
| 878 | u32 set_word[8]; | ||
| 879 | } program; | ||
| 868 | 880 | ||
| 869 | struct { | 881 | INSERT_PADDING_WORDS(0x1); |
| 870 | // Offset of the next instruction to write code to. | ||
| 871 | // Incremented with each instruction write. | ||
| 872 | u32 offset; | ||
| 873 | 882 | ||
| 874 | // Writing to these registers sets the "current" word in the shader program. | 883 | // This register group is used to load an internal table of swizzling patterns, |
| 875 | // TODO: It's not clear how the hardware stores what the "current" word is. | 884 | // which are indexed by each shader instruction to specify vector component swizzling. |
| 876 | u32 set_word[8]; | 885 | struct { |
| 877 | } vs_program; | 886 | // Offset of the next swizzle pattern to write code to. |
| 887 | // Incremented with each instruction write. | ||
| 888 | u32 offset; | ||
| 878 | 889 | ||
| 879 | INSERT_PADDING_WORDS(0x1); | 890 | // Writing to these registers sets the current swizzle pattern in the table. |
| 891 | u32 set_word[8]; | ||
| 892 | } swizzle_patterns; | ||
| 880 | 893 | ||
| 881 | // This register group is used to load an internal table of swizzling patterns, | 894 | INSERT_PADDING_WORDS(0x2); |
| 882 | // which are indexed by each shader instruction to specify vector component swizzling. | 895 | }; |
| 883 | struct { | ||
| 884 | // Offset of the next swizzle pattern to write code to. | ||
| 885 | // Incremented with each instruction write. | ||
| 886 | u32 offset; | ||
| 887 | 896 | ||
| 888 | // Writing to these registers sets the "current" swizzle pattern in the table. | 897 | ShaderConfig gs; |
| 889 | // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is. | 898 | ShaderConfig vs; |
| 890 | u32 set_word[8]; | ||
| 891 | } vs_swizzle_patterns; | ||
| 892 | 899 | ||
| 893 | INSERT_PADDING_WORDS(0x22); | 900 | INSERT_PADDING_WORDS(0x20); |
| 894 | 901 | ||
| 895 | // Map register indices to names readable by humans | 902 | // Map register indices to names readable by humans |
| 896 | // Used for debugging purposes, so performance is not an issue here | 903 | // Used for debugging purposes, so performance is not an issue here |
| @@ -937,13 +944,20 @@ struct Regs { | |||
| 937 | ADD_FIELD(vs_default_attributes_setup); | 944 | ADD_FIELD(vs_default_attributes_setup); |
| 938 | ADD_FIELD(command_buffer); | 945 | ADD_FIELD(command_buffer); |
| 939 | ADD_FIELD(triangle_topology); | 946 | ADD_FIELD(triangle_topology); |
| 940 | ADD_FIELD(vs_bool_uniforms); | 947 | ADD_FIELD(gs.bool_uniforms); |
| 941 | ADD_FIELD(vs_int_uniforms); | 948 | ADD_FIELD(gs.int_uniforms); |
| 942 | ADD_FIELD(vs_main_offset); | 949 | ADD_FIELD(gs.main_offset); |
| 943 | ADD_FIELD(vs_input_register_map); | 950 | ADD_FIELD(gs.input_register_map); |
| 944 | ADD_FIELD(vs_uniform_setup); | 951 | ADD_FIELD(gs.uniform_setup); |
| 945 | ADD_FIELD(vs_program); | 952 | ADD_FIELD(gs.program); |
| 946 | ADD_FIELD(vs_swizzle_patterns); | 953 | ADD_FIELD(gs.swizzle_patterns); |
| 954 | ADD_FIELD(vs.bool_uniforms); | ||
| 955 | ADD_FIELD(vs.int_uniforms); | ||
| 956 | ADD_FIELD(vs.main_offset); | ||
| 957 | ADD_FIELD(vs.input_register_map); | ||
| 958 | ADD_FIELD(vs.uniform_setup); | ||
| 959 | ADD_FIELD(vs.program); | ||
| 960 | ADD_FIELD(vs.swizzle_patterns); | ||
| 947 | 961 | ||
| 948 | #undef ADD_FIELD | 962 | #undef ADD_FIELD |
| 949 | 963 | ||
| @@ -1015,17 +1029,14 @@ ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | |||
| 1015 | ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); | 1029 | ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); |
| 1016 | ASSERT_REG_POSITION(command_buffer, 0x238); | 1030 | ASSERT_REG_POSITION(command_buffer, 0x238); |
| 1017 | ASSERT_REG_POSITION(triangle_topology, 0x25e); | 1031 | ASSERT_REG_POSITION(triangle_topology, 0x25e); |
| 1018 | ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); | 1032 | ASSERT_REG_POSITION(gs, 0x280); |
| 1019 | ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); | 1033 | ASSERT_REG_POSITION(vs, 0x2b0); |
| 1020 | ASSERT_REG_POSITION(vs_main_offset, 0x2ba); | ||
| 1021 | ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); | ||
| 1022 | ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); | ||
| 1023 | ASSERT_REG_POSITION(vs_program, 0x2cb); | ||
| 1024 | ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); | ||
| 1025 | 1034 | ||
| 1026 | #undef ASSERT_REG_POSITION | 1035 | #undef ASSERT_REG_POSITION |
| 1027 | #endif // !defined(_MSC_VER) | 1036 | #endif // !defined(_MSC_VER) |
| 1028 | 1037 | ||
| 1038 | static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); | ||
| 1039 | |||
| 1029 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. | 1040 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. |
| 1030 | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | 1041 | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); |
| 1031 | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | 1042 | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); |
| @@ -1135,7 +1146,7 @@ struct State { | |||
| 1135 | Regs regs; | 1146 | Regs regs; |
| 1136 | 1147 | ||
| 1137 | /// Vertex shader memory | 1148 | /// Vertex shader memory |
| 1138 | struct { | 1149 | struct ShaderSetup { |
| 1139 | struct { | 1150 | struct { |
| 1140 | Math::Vec4<float24> f[96]; | 1151 | Math::Vec4<float24> f[96]; |
| 1141 | std::array<bool, 16> b; | 1152 | std::array<bool, 16> b; |
| @@ -1146,7 +1157,10 @@ struct State { | |||
| 1146 | 1157 | ||
| 1147 | std::array<u32, 1024> program_code; | 1158 | std::array<u32, 1024> program_code; |
| 1148 | std::array<u32, 1024> swizzle_data; | 1159 | std::array<u32, 1024> swizzle_data; |
| 1149 | } vs; | 1160 | }; |
| 1161 | |||
| 1162 | ShaderSetup vs; | ||
| 1163 | ShaderSetup gs; | ||
| 1150 | 1164 | ||
| 1151 | /// Current Pica command list | 1165 | /// Current Pica command list |
| 1152 | struct { | 1166 | struct { |
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 0120f2896..2f22bdcce 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp | |||
| @@ -20,8 +20,9 @@ template<typename VertexType> | |||
| 20 | void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) | 20 | void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) |
| 21 | { | 21 | { |
| 22 | switch (topology) { | 22 | switch (topology) { |
| 23 | // TODO: Figure out what's different with TriangleTopology::Shader. | ||
| 23 | case Regs::TriangleTopology::List: | 24 | case Regs::TriangleTopology::List: |
| 24 | case Regs::TriangleTopology::ListIndexed: | 25 | case Regs::TriangleTopology::Shader: |
| 25 | if (buffer_index < 2) { | 26 | if (buffer_index < 2) { |
| 26 | buffer[buffer_index++] = vtx; | 27 | buffer[buffer_index++] = vtx; |
| 27 | } else { | 28 | } else { |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index d32c2e371..b77503806 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -546,20 +546,18 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 546 | 546 | ||
| 547 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | 547 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); |
| 548 | 548 | ||
| 549 | OutputVertex RunShader(const InputVertex& input, int num_attributes) { | 549 | OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { |
| 550 | Common::Profiling::ScopeTimer timer(shader_category); | 550 | Common::Profiling::ScopeTimer timer(shader_category); |
| 551 | 551 | ||
| 552 | const auto& regs = g_state.regs; | ||
| 553 | const auto& vs = g_state.vs; | ||
| 554 | VertexShaderState state; | 552 | VertexShaderState state; |
| 555 | 553 | ||
| 556 | const u32* main = &vs.program_code[regs.vs_main_offset]; | 554 | const u32* main = &setup.program_code[config.main_offset]; |
| 557 | state.program_counter = (u32*)main; | 555 | state.program_counter = (u32*)main; |
| 558 | state.debug.max_offset = 0; | 556 | state.debug.max_offset = 0; |
| 559 | state.debug.max_opdesc_id = 0; | 557 | state.debug.max_opdesc_id = 0; |
| 560 | 558 | ||
| 561 | // Setup input register table | 559 | // Setup input register table |
| 562 | const auto& attribute_register_map = regs.vs_input_register_map; | 560 | const auto& attribute_register_map = config.input_register_map; |
| 563 | float24 dummy_register; | 561 | float24 dummy_register; |
| 564 | boost::fill(state.input_register_table, &dummy_register); | 562 | boost::fill(state.input_register_table, &dummy_register); |
| 565 | 563 | ||
| @@ -584,16 +582,16 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { | |||
| 584 | state.conditional_code[1] = false; | 582 | state.conditional_code[1] = false; |
| 585 | 583 | ||
| 586 | ProcessShaderCode(state); | 584 | ProcessShaderCode(state); |
| 587 | DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(), | 585 | DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), |
| 588 | state.debug.max_opdesc_id, regs.vs_main_offset, | 586 | state.debug.max_opdesc_id, config.main_offset, |
| 589 | regs.vs_output_attributes); | 587 | g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here |
| 590 | 588 | ||
| 591 | // Setup output data | 589 | // Setup output data |
| 592 | OutputVertex ret; | 590 | OutputVertex ret; |
| 593 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 591 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to |
| 594 | // figure out what those circumstances are and enable the remaining outputs then. | 592 | // figure out what those circumstances are and enable the remaining outputs then. |
| 595 | for (int i = 0; i < 7; ++i) { | 593 | for (int i = 0; i < 7; ++i) { |
| 596 | const auto& output_register_map = regs.vs_output_attributes[i]; | 594 | const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here |
| 597 | 595 | ||
| 598 | u32 semantics[4] = { | 596 | u32 semantics[4] = { |
| 599 | output_register_map.map_x, output_register_map.map_y, | 597 | output_register_map.map_x, output_register_map.map_y, |
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index c997e6a77..97f9250dd 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h | |||
| @@ -65,7 +65,7 @@ struct OutputVertex { | |||
| 65 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 65 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 66 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 66 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 67 | 67 | ||
| 68 | OutputVertex RunShader(const InputVertex& input, int num_attributes); | 68 | OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); |
| 69 | 69 | ||
| 70 | } // namespace | 70 | } // namespace |
| 71 | 71 | ||