diff options
| m--------- | externals/nihstro | 0 | ||||
| -rw-r--r-- | src/core/core.cpp | 4 | ||||
| -rw-r--r-- | src/core/hle/kernel/session.h | 10 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.cpp | 11 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.h | 8 | ||||
| -rw-r--r-- | src/video_core/pica.h | 67 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 20 |
8 files changed, 143 insertions, 26 deletions
diff --git a/externals/nihstro b/externals/nihstro | |||
| Subproject 4a78588b308564f7ebae193e0ae00d9a0d5741d | Subproject 81f1804a43f625e3a1a20752c0db70a41341038 | ||
diff --git a/src/core/core.cpp b/src/core/core.cpp index bb2ed7a92..b5c258230 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -61,10 +61,6 @@ int Init() { | |||
| 61 | g_sys_core = new ARM_DynCom(USER32MODE); | 61 | g_sys_core = new ARM_DynCom(USER32MODE); |
| 62 | g_app_core = new ARM_DynCom(USER32MODE); | 62 | g_app_core = new ARM_DynCom(USER32MODE); |
| 63 | 63 | ||
| 64 | // TODO: Whenever TLS is implemented, this should contain | ||
| 65 | // the address of the 0x200-byte TLS | ||
| 66 | g_app_core->SetCP15Register(CP15_THREAD_URO, Memory::TLS_AREA_VADDR); | ||
| 67 | |||
| 68 | LOG_DEBUG(Core, "Initialized OK"); | 64 | LOG_DEBUG(Core, "Initialized OK"); |
| 69 | return 0; | 65 | return 0; |
| 70 | } | 66 | } |
diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h index 0fd18148a..8c3886ffd 100644 --- a/src/core/hle/kernel/session.h +++ b/src/core/hle/kernel/session.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "core/hle/kernel/kernel.h" | 7 | #include "core/hle/kernel/kernel.h" |
| 8 | #include "core/hle/kernel/thread.h" | ||
| 8 | #include "core/mem_map.h" | 9 | #include "core/mem_map.h" |
| 9 | 10 | ||
| 10 | namespace Kernel { | 11 | namespace Kernel { |
| @@ -12,12 +13,15 @@ namespace Kernel { | |||
| 12 | static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of header | 13 | static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of header |
| 13 | 14 | ||
| 14 | /** | 15 | /** |
| 15 | * Returns a pointer to the command buffer in kernel memory | 16 | * Returns a pointer to the command buffer in the current thread's TLS |
| 17 | * TODO(Subv): This is not entirely correct, the command buffer should be copied from | ||
| 18 | * the thread's TLS to an intermediate buffer in kernel memory, and then copied again to | ||
| 19 | * the service handler process' memory. | ||
| 16 | * @param offset Optional offset into command buffer | 20 | * @param offset Optional offset into command buffer |
| 17 | * @return Pointer to command buffer | 21 | * @return Pointer to command buffer |
| 18 | */ | 22 | */ |
| 19 | inline static u32* GetCommandBuffer(const int offset=0) { | 23 | inline static u32* GetCommandBuffer(const int offset = 0) { |
| 20 | return (u32*)Memory::GetPointer(Memory::TLS_AREA_VADDR + kCommandHeaderOffset + offset); | 24 | return (u32*)Memory::GetPointer(GetCurrentThread()->GetTLSAddress() + kCommandHeaderOffset + offset); |
| 21 | } | 25 | } |
| 22 | 26 | ||
| 23 | /** | 27 | /** |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 0a3fd7cb1..5de8f9a73 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -197,6 +197,7 @@ static void SwitchContext(Thread* new_thread) { | |||
| 197 | new_thread->current_priority = new_thread->nominal_priority; | 197 | new_thread->current_priority = new_thread->nominal_priority; |
| 198 | 198 | ||
| 199 | Core::g_app_core->LoadContext(new_thread->context); | 199 | Core::g_app_core->LoadContext(new_thread->context); |
| 200 | Core::g_app_core->SetCP15Register(CP15_THREAD_URO, new_thread->GetTLSAddress()); | ||
| 200 | } else { | 201 | } else { |
| 201 | current_thread = nullptr; | 202 | current_thread = nullptr; |
| 202 | } | 203 | } |
| @@ -402,6 +403,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, | |||
| 402 | thread->name = std::move(name); | 403 | thread->name = std::move(name); |
| 403 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); | 404 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); |
| 404 | 405 | ||
| 406 | VAddr tls_address = Memory::TLS_AREA_VADDR + (thread->thread_id - 1) * 0x200; | ||
| 407 | |||
| 408 | ASSERT_MSG(tls_address < Memory::TLS_AREA_VADDR_END, "Too many threads"); | ||
| 409 | |||
| 410 | thread->tls_address = tls_address; | ||
| 411 | |||
| 405 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used | 412 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used |
| 406 | // to initialize the context | 413 | // to initialize the context |
| 407 | Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg); | 414 | Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg); |
| @@ -495,6 +502,10 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { | |||
| 495 | context.cpu_registers[1] = output; | 502 | context.cpu_registers[1] = output; |
| 496 | } | 503 | } |
| 497 | 504 | ||
| 505 | VAddr Thread::GetTLSAddress() const { | ||
| 506 | return tls_address; | ||
| 507 | } | ||
| 508 | |||
| 498 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 509 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 499 | 510 | ||
| 500 | void ThreadingInit() { | 511 | void ThreadingInit() { |
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 9958b16e6..6891c8c2f 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -135,6 +135,12 @@ public: | |||
| 135 | */ | 135 | */ |
| 136 | void Stop(); | 136 | void Stop(); |
| 137 | 137 | ||
| 138 | /* | ||
| 139 | * Returns the Thread Local Storage address of the current thread | ||
| 140 | * @returns VAddr of the thread's TLS | ||
| 141 | */ | ||
| 142 | VAddr GetTLSAddress() const; | ||
| 143 | |||
| 138 | Core::ThreadContext context; | 144 | Core::ThreadContext context; |
| 139 | 145 | ||
| 140 | u32 thread_id; | 146 | u32 thread_id; |
| @@ -150,6 +156,8 @@ public: | |||
| 150 | 156 | ||
| 151 | s32 processor_id; | 157 | s32 processor_id; |
| 152 | 158 | ||
| 159 | VAddr tls_address; ///< Address of the Thread Local Storage of the thread | ||
| 160 | |||
| 153 | /// Mutexes currently held by this thread, which will be released when it exits. | 161 | /// Mutexes currently held by this thread, which will be released when it exits. |
| 154 | boost::container::flat_set<SharedPtr<Mutex>> held_mutexes; | 162 | boost::container::flat_set<SharedPtr<Mutex>> held_mutexes; |
| 155 | 163 | ||
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index e4a91058c..5e169ff69 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -226,7 +226,8 @@ struct Regs { | |||
| 226 | Texture1 = 0x4, | 226 | Texture1 = 0x4, |
| 227 | Texture2 = 0x5, | 227 | Texture2 = 0x5, |
| 228 | Texture3 = 0x6, | 228 | Texture3 = 0x6, |
| 229 | // 0x7-0xc = primary color?? | 229 | |
| 230 | PreviousBuffer = 0xd, | ||
| 230 | Constant = 0xe, | 231 | Constant = 0xe, |
| 231 | Previous = 0xf, | 232 | Previous = 0xf, |
| 232 | }; | 233 | }; |
| @@ -299,7 +300,18 @@ struct Regs { | |||
| 299 | BitField<24, 8, u32> const_a; | 300 | BitField<24, 8, u32> const_a; |
| 300 | }; | 301 | }; |
| 301 | 302 | ||
| 302 | INSERT_PADDING_WORDS(0x1); | 303 | union { |
| 304 | BitField< 0, 2, u32> color_scale; | ||
| 305 | BitField<16, 2, u32> alpha_scale; | ||
| 306 | }; | ||
| 307 | |||
| 308 | inline unsigned GetColorMultiplier() const { | ||
| 309 | return (color_scale < 3) ? (1 << color_scale) : 1; | ||
| 310 | } | ||
| 311 | |||
| 312 | inline unsigned GetAlphaMultiplier() const { | ||
| 313 | return (alpha_scale < 3) ? (1 << alpha_scale) : 1; | ||
| 314 | } | ||
| 303 | }; | 315 | }; |
| 304 | 316 | ||
| 305 | TevStageConfig tev_stage0; | 317 | TevStageConfig tev_stage0; |
| @@ -309,11 +321,36 @@ struct Regs { | |||
| 309 | TevStageConfig tev_stage2; | 321 | TevStageConfig tev_stage2; |
| 310 | INSERT_PADDING_WORDS(0x3); | 322 | INSERT_PADDING_WORDS(0x3); |
| 311 | TevStageConfig tev_stage3; | 323 | TevStageConfig tev_stage3; |
| 312 | INSERT_PADDING_WORDS(0x13); | 324 | INSERT_PADDING_WORDS(0x3); |
| 325 | |||
| 326 | union { | ||
| 327 | // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in | ||
| 328 | // these masks are set | ||
| 329 | BitField< 8, 4, u32> update_mask_rgb; | ||
| 330 | BitField<12, 4, u32> update_mask_a; | ||
| 331 | |||
| 332 | bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { | ||
| 333 | return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); | ||
| 334 | } | ||
| 335 | |||
| 336 | bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { | ||
| 337 | return (stage_index < 4) && (update_mask_a & (1 << stage_index)); | ||
| 338 | } | ||
| 339 | } tev_combiner_buffer_input; | ||
| 340 | |||
| 341 | INSERT_PADDING_WORDS(0xf); | ||
| 313 | TevStageConfig tev_stage4; | 342 | TevStageConfig tev_stage4; |
| 314 | INSERT_PADDING_WORDS(0x3); | 343 | INSERT_PADDING_WORDS(0x3); |
| 315 | TevStageConfig tev_stage5; | 344 | TevStageConfig tev_stage5; |
| 316 | INSERT_PADDING_WORDS(0x3); | 345 | |
| 346 | union { | ||
| 347 | BitField< 0, 8, u32> r; | ||
| 348 | BitField< 8, 8, u32> g; | ||
| 349 | BitField<16, 8, u32> b; | ||
| 350 | BitField<24, 8, u32> a; | ||
| 351 | } tev_combiner_buffer_color; | ||
| 352 | |||
| 353 | INSERT_PADDING_WORDS(0x2); | ||
| 317 | 354 | ||
| 318 | const std::array<Regs::TevStageConfig,6> GetTevStages() const { | 355 | const std::array<Regs::TevStageConfig,6> GetTevStages() const { |
| 319 | return { tev_stage0, tev_stage1, | 356 | return { tev_stage0, tev_stage1, |
| @@ -426,9 +463,7 @@ struct Regs { | |||
| 426 | D24S8 = 3 | 463 | D24S8 = 3 |
| 427 | }; | 464 | }; |
| 428 | 465 | ||
| 429 | /* | 466 | // Returns the number of bytes in the specified depth format |
| 430 | * Returns the number of bytes in the specified depth format | ||
| 431 | */ | ||
| 432 | static u32 BytesPerDepthPixel(DepthFormat format) { | 467 | static u32 BytesPerDepthPixel(DepthFormat format) { |
| 433 | switch (format) { | 468 | switch (format) { |
| 434 | case DepthFormat::D16: | 469 | case DepthFormat::D16: |
| @@ -443,6 +478,20 @@ struct Regs { | |||
| 443 | } | 478 | } |
| 444 | } | 479 | } |
| 445 | 480 | ||
| 481 | // Returns the number of bits per depth component of the specified depth format | ||
| 482 | static u32 DepthBitsPerPixel(DepthFormat format) { | ||
| 483 | switch (format) { | ||
| 484 | case DepthFormat::D16: | ||
| 485 | return 16; | ||
| 486 | case DepthFormat::D24: | ||
| 487 | case DepthFormat::D24S8: | ||
| 488 | return 24; | ||
| 489 | default: | ||
| 490 | LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); | ||
| 491 | UNIMPLEMENTED(); | ||
| 492 | } | ||
| 493 | } | ||
| 494 | |||
| 446 | struct { | 495 | struct { |
| 447 | // Components are laid out in reverse byte order, most significant bits first. | 496 | // Components are laid out in reverse byte order, most significant bits first. |
| 448 | enum ColorFormat : u32 { | 497 | enum ColorFormat : u32 { |
| @@ -784,8 +833,10 @@ struct Regs { | |||
| 784 | ADD_FIELD(tev_stage1); | 833 | ADD_FIELD(tev_stage1); |
| 785 | ADD_FIELD(tev_stage2); | 834 | ADD_FIELD(tev_stage2); |
| 786 | ADD_FIELD(tev_stage3); | 835 | ADD_FIELD(tev_stage3); |
| 836 | ADD_FIELD(tev_combiner_buffer_input); | ||
| 787 | ADD_FIELD(tev_stage4); | 837 | ADD_FIELD(tev_stage4); |
| 788 | ADD_FIELD(tev_stage5); | 838 | ADD_FIELD(tev_stage5); |
| 839 | ADD_FIELD(tev_combiner_buffer_color); | ||
| 789 | ADD_FIELD(output_merger); | 840 | ADD_FIELD(output_merger); |
| 790 | ADD_FIELD(framebuffer); | 841 | ADD_FIELD(framebuffer); |
| 791 | ADD_FIELD(vertex_attributes); | 842 | ADD_FIELD(vertex_attributes); |
| @@ -859,8 +910,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0); | |||
| 859 | ASSERT_REG_POSITION(tev_stage1, 0xc8); | 910 | ASSERT_REG_POSITION(tev_stage1, 0xc8); |
| 860 | ASSERT_REG_POSITION(tev_stage2, 0xd0); | 911 | ASSERT_REG_POSITION(tev_stage2, 0xd0); |
| 861 | ASSERT_REG_POSITION(tev_stage3, 0xd8); | 912 | ASSERT_REG_POSITION(tev_stage3, 0xd8); |
| 913 | ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0); | ||
| 862 | ASSERT_REG_POSITION(tev_stage4, 0xf0); | 914 | ASSERT_REG_POSITION(tev_stage4, 0xf0); |
| 863 | ASSERT_REG_POSITION(tev_stage5, 0xf8); | 915 | ASSERT_REG_POSITION(tev_stage5, 0xf8); |
| 916 | ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); | ||
| 864 | ASSERT_REG_POSITION(output_merger, 0x100); | 917 | ASSERT_REG_POSITION(output_merger, 0x100); |
| 865 | ASSERT_REG_POSITION(framebuffer, 0x110); | 918 | ASSERT_REG_POSITION(framebuffer, 0x110); |
| 866 | ASSERT_REG_POSITION(vertex_attributes, 0x200); | 919 | ASSERT_REG_POSITION(vertex_attributes, 0x200); |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 3b3fef484..46a326bb4 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -90,7 +90,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 90 | UNIMPLEMENTED(); | 90 | UNIMPLEMENTED(); |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | return {}; | 93 | return {0, 0, 0, 0}; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | static u32 GetDepth(int x, int y) { | 96 | static u32 GetDepth(int x, int y) { |
| @@ -376,7 +376,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 376 | // with some basic arithmetic. Alpha combiners can be configured separately but work | 376 | // with some basic arithmetic. Alpha combiners can be configured separately but work |
| 377 | // analogously. | 377 | // analogously. |
| 378 | Math::Vec4<u8> combiner_output; | 378 | Math::Vec4<u8> combiner_output; |
| 379 | for (const auto& tev_stage : tev_stages) { | 379 | Math::Vec4<u8> combiner_buffer = { |
| 380 | registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, | ||
| 381 | registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a | ||
| 382 | }; | ||
| 383 | |||
| 384 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | ||
| 385 | const auto& tev_stage = tev_stages[tev_stage_index]; | ||
| 380 | using Source = Regs::TevStageConfig::Source; | 386 | using Source = Regs::TevStageConfig::Source; |
| 381 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | 387 | using ColorModifier = Regs::TevStageConfig::ColorModifier; |
| 382 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; | 388 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; |
| @@ -398,6 +404,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 398 | case Source::Texture2: | 404 | case Source::Texture2: |
| 399 | return texture_color[2]; | 405 | return texture_color[2]; |
| 400 | 406 | ||
| 407 | case Source::PreviousBuffer: | ||
| 408 | return combiner_buffer; | ||
| 409 | |||
| 401 | case Source::Constant: | 410 | case Source::Constant: |
| 402 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; | 411 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; |
| 403 | 412 | ||
| @@ -407,7 +416,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 407 | default: | 416 | default: |
| 408 | LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); | 417 | LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); |
| 409 | UNIMPLEMENTED(); | 418 | UNIMPLEMENTED(); |
| 410 | return {}; | 419 | return {0, 0, 0, 0}; |
| 411 | } | 420 | } |
| 412 | }; | 421 | }; |
| 413 | 422 | ||
| @@ -490,6 +499,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 490 | return result.Cast<u8>(); | 499 | return result.Cast<u8>(); |
| 491 | } | 500 | } |
| 492 | 501 | ||
| 502 | case Operation::AddSigned: | ||
| 503 | { | ||
| 504 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | ||
| 505 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | ||
| 506 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||
| 507 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||
| 508 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||
| 509 | return result.Cast<u8>(); | ||
| 510 | } | ||
| 511 | |||
| 493 | case Operation::Lerp: | 512 | case Operation::Lerp: |
| 494 | return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); | 513 | return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); |
| 495 | 514 | ||
| @@ -524,7 +543,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 524 | default: | 543 | default: |
| 525 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | 544 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); |
| 526 | UNIMPLEMENTED(); | 545 | UNIMPLEMENTED(); |
| 527 | return {}; | 546 | return {0, 0, 0}; |
| 528 | } | 547 | } |
| 529 | }; | 548 | }; |
| 530 | 549 | ||
| @@ -578,7 +597,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 578 | }; | 597 | }; |
| 579 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | 598 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); |
| 580 | 599 | ||
| 581 | combiner_output = Math::MakeVec(color_output, alpha_output); | 600 | combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); |
| 601 | combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); | ||
| 602 | combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | ||
| 603 | combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | ||
| 604 | |||
| 605 | if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { | ||
| 606 | combiner_buffer.r() = combiner_output.r(); | ||
| 607 | combiner_buffer.g() = combiner_output.g(); | ||
| 608 | combiner_buffer.b() = combiner_output.b(); | ||
| 609 | } | ||
| 610 | |||
| 611 | if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { | ||
| 612 | combiner_buffer.a() = combiner_output.a(); | ||
| 613 | } | ||
| 582 | } | 614 | } |
| 583 | 615 | ||
| 584 | if (registers.output_merger.alpha_test.enable) { | 616 | if (registers.output_merger.alpha_test.enable) { |
| @@ -624,9 +656,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 624 | 656 | ||
| 625 | // TODO: Does depth indeed only get written even if depth testing is enabled? | 657 | // TODO: Does depth indeed only get written even if depth testing is enabled? |
| 626 | if (registers.output_merger.depth_test_enable) { | 658 | if (registers.output_merger.depth_test_enable) { |
| 627 | u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + | 659 | unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); |
| 628 | v1.screenpos[2].ToFloat32() * w1 + | 660 | u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + |
| 629 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); | 661 | v1.screenpos[2].ToFloat32() * w1 + |
| 662 | v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); | ||
| 630 | u32 ref_z = GetDepth(x >> 4, y >> 4); | 663 | u32 ref_z = GetDepth(x >> 4, y >> 4); |
| 631 | 664 | ||
| 632 | bool pass = false; | 665 | bool pass = false; |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 51f4e58bf..885b7de59 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -235,6 +235,15 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 235 | break; | 235 | break; |
| 236 | } | 236 | } |
| 237 | 237 | ||
| 238 | case OpCode::Id::FLR: | ||
| 239 | for (int i = 0; i < 4; ++i) { | ||
| 240 | if (!swizzle.DestComponentEnabled(i)) | ||
| 241 | continue; | ||
| 242 | |||
| 243 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | ||
| 244 | } | ||
| 245 | break; | ||
| 246 | |||
| 238 | case OpCode::Id::MAX: | 247 | case OpCode::Id::MAX: |
| 239 | for (int i = 0; i < 4; ++i) { | 248 | for (int i = 0; i < 4; ++i) { |
| 240 | if (!swizzle.DestComponentEnabled(i)) | 249 | if (!swizzle.DestComponentEnabled(i)) |
| @@ -366,12 +375,15 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 366 | 375 | ||
| 367 | case OpCode::Type::MultiplyAdd: | 376 | case OpCode::Type::MultiplyAdd: |
| 368 | { | 377 | { |
| 369 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { | 378 | if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || |
| 379 | (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { | ||
| 370 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; | 380 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; |
| 371 | 381 | ||
| 372 | const float24* src1_ = LookupSourceRegister(instr.mad.src1); | 382 | bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); |
| 373 | const float24* src2_ = LookupSourceRegister(instr.mad.src2); | 383 | |
| 374 | const float24* src3_ = LookupSourceRegister(instr.mad.src3); | 384 | const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); |
| 385 | const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted)); | ||
| 386 | const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted)); | ||
| 375 | 387 | ||
| 376 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | 388 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |
| 377 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | 389 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |