diff options
| author | 2016-12-17 13:28:59 +0100 | |
|---|---|---|
| committer | 2017-05-12 16:22:37 +0200 | |
| commit | 3fd3775d35c3634f7fdf7a1e9933e2d8d6e8198e (patch) | |
| tree | f73066674f6fbb69308e84da54474eb90d5e48e6 /src | |
| parent | Pica: Set program code / swizzle data limit to 4096 (diff) | |
| download | yuzu-3fd3775d35c3634f7fdf7a1e9933e2d8d6e8198e.tar.gz yuzu-3fd3775d35c3634f7fdf7a1e9933e2d8d6e8198e.tar.xz yuzu-3fd3775d35c3634f7fdf7a1e9933e2d8d6e8198e.zip | |
Pica: Write shader registers in functions
The commit after this one adds GS register writes, so this moves the VS handlers into functions so they can be re-used and extended more easily.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/command_processor.cpp | 160 |
1 files changed, 103 insertions, 57 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 2e32ff905..c29ad6775 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -32,12 +32,13 @@ namespace Pica { | |||
| 32 | 32 | ||
| 33 | namespace CommandProcessor { | 33 | namespace CommandProcessor { |
| 34 | 34 | ||
| 35 | static int float_regs_counter = 0; | 35 | static int vs_float_regs_counter = 0; |
| 36 | static u32 vs_uniform_write_buffer[4]; | ||
| 36 | 37 | ||
| 37 | static u32 uniform_write_buffer[4]; | 38 | static int gs_float_regs_counter = 0; |
| 39 | static u32 gs_uniform_write_buffer[4]; | ||
| 38 | 40 | ||
| 39 | static int default_attr_counter = 0; | 41 | static int default_attr_counter = 0; |
| 40 | |||
| 41 | static u32 default_attr_write_buffer[3]; | 42 | static u32 default_attr_write_buffer[3]; |
| 42 | 43 | ||
| 43 | // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF | 44 | // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF |
| @@ -48,6 +49,97 @@ static const u32 expand_bits_to_bytes[] = { | |||
| 48 | 49 | ||
| 49 | MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); | 50 | MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); |
| 50 | 51 | ||
| 52 | static const char* GetShaderSetupTypeName(Shader::ShaderSetup& setup) { | ||
| 53 | if (&setup == &g_state.vs) { | ||
| 54 | return "vertex shader"; | ||
| 55 | } | ||
| 56 | if (&setup == &g_state.gs) { | ||
| 57 | return "geometry shader"; | ||
| 58 | } | ||
| 59 | return "unknown shader"; | ||
| 60 | } | ||
| 61 | |||
| 62 | static void WriteUniformBoolReg(Shader::ShaderSetup& setup, u32 value) { | ||
| 63 | for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) | ||
| 64 | setup.uniforms.b[i] = (value & (1 << i)) != 0; | ||
| 65 | } | ||
| 66 | |||
| 67 | static void WriteUniformIntReg(Shader::ShaderSetup& setup, unsigned index, | ||
| 68 | const Math::Vec4<u8>& values) { | ||
| 69 | ASSERT(index < setup.uniforms.i.size()); | ||
| 70 | setup.uniforms.i[index] = values; | ||
| 71 | LOG_TRACE(HW_GPU, "Set %s integer uniform %d to %02x %02x %02x %02x", | ||
| 72 | GetShaderSetupTypeName(setup), index, values.x, values.y, values.z, values.w); | ||
| 73 | } | ||
| 74 | |||
| 75 | static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, | ||
| 76 | int& float_regs_counter, u32 uniform_write_buffer[4], u32 value) { | ||
| 77 | auto& uniform_setup = config.uniform_setup; | ||
| 78 | |||
| 79 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 80 | // it directly write the values? | ||
| 81 | uniform_write_buffer[float_regs_counter++] = value; | ||
| 82 | |||
| 83 | // Uniforms are written in a packed format such that four float24 values are encoded in | ||
| 84 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 85 | // written. | ||
| 86 | if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || | ||
| 87 | (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||
| 88 | float_regs_counter = 0; | ||
| 89 | |||
| 90 | auto& uniform = setup.uniforms.f[uniform_setup.index]; | ||
| 91 | |||
| 92 | if (uniform_setup.index >= 96) { | ||
| 93 | LOG_ERROR(HW_GPU, "Invalid %s float uniform index %d", GetShaderSetupTypeName(setup), | ||
| 94 | (int)uniform_setup.index); | ||
| 95 | } else { | ||
| 96 | |||
| 97 | // NOTE: The destination component order indeed is "backwards" | ||
| 98 | if (uniform_setup.IsFloat32()) { | ||
| 99 | for (auto i : {0, 1, 2, 3}) | ||
| 100 | uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | ||
| 101 | } else { | ||
| 102 | // TODO: Untested | ||
| 103 | uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); | ||
| 104 | uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | | ||
| 105 | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 106 | uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | | ||
| 107 | ((uniform_write_buffer[2] >> 24) & 0xFF)); | ||
| 108 | uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); | ||
| 109 | } | ||
| 110 | |||
| 111 | LOG_TRACE(HW_GPU, "Set %s float uniform %x to (%f %f %f %f)", | ||
| 112 | GetShaderSetupTypeName(setup), (int)uniform_setup.index, | ||
| 113 | uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), | ||
| 114 | uniform.w.ToFloat32()); | ||
| 115 | |||
| 116 | // TODO: Verify that this actually modifies the register! | ||
| 117 | uniform_setup.index.Assign(uniform_setup.index + 1); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup, | ||
| 123 | unsigned max_program_code_length, u32 value) { | ||
| 124 | if (config.program.offset >= max_program_code_length) { | ||
| 125 | LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup), | ||
| 126 | (int)config.program.offset); | ||
| 127 | } else { | ||
| 128 | setup.program_code[config.program.offset] = value; | ||
| 129 | config.program.offset++; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) { | ||
| 134 | if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) { | ||
| 135 | LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup), | ||
| 136 | (int)config.swizzle_patterns.offset); | ||
| 137 | } else { | ||
| 138 | setup.swizzle_data[config.swizzle_patterns.offset] = value; | ||
| 139 | config.swizzle_patterns.offset++; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 51 | static void WritePicaReg(u32 id, u32 value, u32 mask) { | 143 | static void WritePicaReg(u32 id, u32 value, u32 mask) { |
| 52 | auto& regs = g_state.regs; | 144 | auto& regs = g_state.regs; |
| 53 | 145 | ||
| @@ -331,20 +423,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 331 | } | 423 | } |
| 332 | 424 | ||
| 333 | case PICA_REG_INDEX(vs.bool_uniforms): | 425 | case PICA_REG_INDEX(vs.bool_uniforms): |
| 334 | for (unsigned i = 0; i < 16; ++i) | 426 | WriteUniformBoolReg(g_state.vs, value); |
| 335 | g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; | ||
| 336 | |||
| 337 | break; | 427 | break; |
| 338 | 428 | ||
| 339 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): | 429 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): |
| 340 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): | 430 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): |
| 341 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): | 431 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): |
| 342 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { | 432 | case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { |
| 343 | int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); | 433 | unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); |
| 344 | auto values = regs.vs.int_uniforms[index]; | 434 | auto values = regs.vs.int_uniforms[index]; |
| 345 | g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | 435 | WriteUniformIntReg(g_state.vs, index, |
| 346 | LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), | 436 | Math::Vec4<u8>(values.x, values.y, values.z, values.w)); |
| 347 | values.y.Value(), values.z.Value(), values.w.Value()); | ||
| 348 | break; | 437 | break; |
| 349 | } | 438 | } |
| 350 | 439 | ||
| @@ -356,51 +445,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 356 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): | 445 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): |
| 357 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): | 446 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): |
| 358 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { | 447 | case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { |
| 359 | auto& uniform_setup = regs.vs.uniform_setup; | 448 | WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter, |
| 360 | 449 | vs_uniform_write_buffer, value); | |
| 361 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 362 | // it directly write the values? | ||
| 363 | uniform_write_buffer[float_regs_counter++] = value; | ||
| 364 | |||
| 365 | // Uniforms are written in a packed format such that four float24 values are encoded in | ||
| 366 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 367 | // written. | ||
| 368 | if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || | ||
| 369 | (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||
| 370 | float_regs_counter = 0; | ||
| 371 | |||
| 372 | auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; | ||
| 373 | |||
| 374 | if (uniform_setup.index > 95) { | ||
| 375 | LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | |||
| 379 | // NOTE: The destination component order indeed is "backwards" | ||
| 380 | if (uniform_setup.IsFloat32()) { | ||
| 381 | for (auto i : {0, 1, 2, 3}) | ||
| 382 | uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | ||
| 383 | } else { | ||
| 384 | // TODO: Untested | ||
| 385 | uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); | ||
| 386 | uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | | ||
| 387 | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 388 | uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | | ||
| 389 | ((uniform_write_buffer[2] >> 24) & 0xFF)); | ||
| 390 | uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); | ||
| 391 | } | ||
| 392 | |||
| 393 | LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, | ||
| 394 | uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), | ||
| 395 | uniform.w.ToFloat32()); | ||
| 396 | |||
| 397 | // TODO: Verify that this actually modifies the register! | ||
| 398 | uniform_setup.index.Assign(uniform_setup.index + 1); | ||
| 399 | } | ||
| 400 | break; | 450 | break; |
| 401 | } | 451 | } |
| 402 | 452 | ||
| 403 | // Load shader program code | ||
| 404 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): | 453 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): |
| 405 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): | 454 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): |
| 406 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): | 455 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): |
| @@ -409,12 +458,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 409 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): | 458 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): |
| 410 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): | 459 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): |
| 411 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { | 460 | case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { |
| 412 | g_state.vs.program_code[regs.vs.program.offset] = value; | 461 | WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value); |
| 413 | regs.vs.program.offset++; | ||
| 414 | break; | 462 | break; |
| 415 | } | 463 | } |
| 416 | 464 | ||
| 417 | // Load swizzle pattern data | ||
| 418 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): | 465 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): |
| 419 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): | 466 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): |
| 420 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): | 467 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): |
| @@ -423,8 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 423 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): | 470 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): |
| 424 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): | 471 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): |
| 425 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { | 472 | case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { |
| 426 | g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; | 473 | WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value); |
| 427 | regs.vs.swizzle_patterns.offset++; | ||
| 428 | break; | 474 | break; |
| 429 | } | 475 | } |
| 430 | 476 | ||