summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
m---------externals/nihstro0
-rw-r--r--src/core/core.cpp4
-rw-r--r--src/core/hle/kernel/session.h10
-rw-r--r--src/core/hle/kernel/thread.cpp11
-rw-r--r--src/core/hle/kernel/thread.h8
-rw-r--r--src/video_core/pica.h67
-rw-r--r--src/video_core/rasterizer.cpp49
-rw-r--r--src/video_core/vertex_shader.cpp20
8 files changed, 143 insertions, 26 deletions
diff --git a/externals/nihstro b/externals/nihstro
Subproject 4a78588b308564f7ebae193e0ae00d9a0d5741d Subproject 81f1804a43f625e3a1a20752c0db70a41341038
diff --git a/src/core/core.cpp b/src/core/core.cpp
index bb2ed7a92..b5c258230 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -61,10 +61,6 @@ int Init() {
61 g_sys_core = new ARM_DynCom(USER32MODE); 61 g_sys_core = new ARM_DynCom(USER32MODE);
62 g_app_core = new ARM_DynCom(USER32MODE); 62 g_app_core = new ARM_DynCom(USER32MODE);
63 63
64 // TODO: Whenever TLS is implemented, this should contain
65 // the address of the 0x200-byte TLS
66 g_app_core->SetCP15Register(CP15_THREAD_URO, Memory::TLS_AREA_VADDR);
67
68 LOG_DEBUG(Core, "Initialized OK"); 64 LOG_DEBUG(Core, "Initialized OK");
69 return 0; 65 return 0;
70} 66}
diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h
index 0fd18148a..8c3886ffd 100644
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "core/hle/kernel/kernel.h" 7#include "core/hle/kernel/kernel.h"
8#include "core/hle/kernel/thread.h"
8#include "core/mem_map.h" 9#include "core/mem_map.h"
9 10
10namespace Kernel { 11namespace Kernel {
@@ -12,12 +13,15 @@ namespace Kernel {
12static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of header 13static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of header
13 14
14/** 15/**
15 * Returns a pointer to the command buffer in kernel memory 16 * Returns a pointer to the command buffer in the current thread's TLS
17 * TODO(Subv): This is not entirely correct, the command buffer should be copied from
18 * the thread's TLS to an intermediate buffer in kernel memory, and then copied again to
19 * the service handler process' memory.
16 * @param offset Optional offset into command buffer 20 * @param offset Optional offset into command buffer
17 * @return Pointer to command buffer 21 * @return Pointer to command buffer
18 */ 22 */
19inline static u32* GetCommandBuffer(const int offset=0) { 23inline static u32* GetCommandBuffer(const int offset = 0) {
20 return (u32*)Memory::GetPointer(Memory::TLS_AREA_VADDR + kCommandHeaderOffset + offset); 24 return (u32*)Memory::GetPointer(GetCurrentThread()->GetTLSAddress() + kCommandHeaderOffset + offset);
21} 25}
22 26
23/** 27/**
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 0a3fd7cb1..5de8f9a73 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -197,6 +197,7 @@ static void SwitchContext(Thread* new_thread) {
197 new_thread->current_priority = new_thread->nominal_priority; 197 new_thread->current_priority = new_thread->nominal_priority;
198 198
199 Core::g_app_core->LoadContext(new_thread->context); 199 Core::g_app_core->LoadContext(new_thread->context);
200 Core::g_app_core->SetCP15Register(CP15_THREAD_URO, new_thread->GetTLSAddress());
200 } else { 201 } else {
201 current_thread = nullptr; 202 current_thread = nullptr;
202 } 203 }
@@ -402,6 +403,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
402 thread->name = std::move(name); 403 thread->name = std::move(name);
403 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); 404 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
404 405
406 VAddr tls_address = Memory::TLS_AREA_VADDR + (thread->thread_id - 1) * 0x200;
407
408 ASSERT_MSG(tls_address < Memory::TLS_AREA_VADDR_END, "Too many threads");
409
410 thread->tls_address = tls_address;
411
405 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 412 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
406 // to initialize the context 413 // to initialize the context
407 Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg); 414 Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg);
@@ -495,6 +502,10 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
495 context.cpu_registers[1] = output; 502 context.cpu_registers[1] = output;
496} 503}
497 504
505VAddr Thread::GetTLSAddress() const {
506 return tls_address;
507}
508
498//////////////////////////////////////////////////////////////////////////////////////////////////// 509////////////////////////////////////////////////////////////////////////////////////////////////////
499 510
500void ThreadingInit() { 511void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 9958b16e6..6891c8c2f 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -135,6 +135,12 @@ public:
135 */ 135 */
136 void Stop(); 136 void Stop();
137 137
138 /*
139 * Returns the Thread Local Storage address of the current thread
140 * @returns VAddr of the thread's TLS
141 */
142 VAddr GetTLSAddress() const;
143
138 Core::ThreadContext context; 144 Core::ThreadContext context;
139 145
140 u32 thread_id; 146 u32 thread_id;
@@ -150,6 +156,8 @@ public:
150 156
151 s32 processor_id; 157 s32 processor_id;
152 158
159 VAddr tls_address; ///< Address of the Thread Local Storage of the thread
160
153 /// Mutexes currently held by this thread, which will be released when it exits. 161 /// Mutexes currently held by this thread, which will be released when it exits.
154 boost::container::flat_set<SharedPtr<Mutex>> held_mutexes; 162 boost::container::flat_set<SharedPtr<Mutex>> held_mutexes;
155 163
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index e4a91058c..5e169ff69 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -226,7 +226,8 @@ struct Regs {
226 Texture1 = 0x4, 226 Texture1 = 0x4,
227 Texture2 = 0x5, 227 Texture2 = 0x5,
228 Texture3 = 0x6, 228 Texture3 = 0x6,
229 // 0x7-0xc = primary color?? 229
230 PreviousBuffer = 0xd,
230 Constant = 0xe, 231 Constant = 0xe,
231 Previous = 0xf, 232 Previous = 0xf,
232 }; 233 };
@@ -299,7 +300,18 @@ struct Regs {
299 BitField<24, 8, u32> const_a; 300 BitField<24, 8, u32> const_a;
300 }; 301 };
301 302
302 INSERT_PADDING_WORDS(0x1); 303 union {
304 BitField< 0, 2, u32> color_scale;
305 BitField<16, 2, u32> alpha_scale;
306 };
307
308 inline unsigned GetColorMultiplier() const {
309 return (color_scale < 3) ? (1 << color_scale) : 1;
310 }
311
312 inline unsigned GetAlphaMultiplier() const {
313 return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
314 }
303 }; 315 };
304 316
305 TevStageConfig tev_stage0; 317 TevStageConfig tev_stage0;
@@ -309,11 +321,36 @@ struct Regs {
309 TevStageConfig tev_stage2; 321 TevStageConfig tev_stage2;
310 INSERT_PADDING_WORDS(0x3); 322 INSERT_PADDING_WORDS(0x3);
311 TevStageConfig tev_stage3; 323 TevStageConfig tev_stage3;
312 INSERT_PADDING_WORDS(0x13); 324 INSERT_PADDING_WORDS(0x3);
325
326 union {
327 // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
328 // these masks are set
329 BitField< 8, 4, u32> update_mask_rgb;
330 BitField<12, 4, u32> update_mask_a;
331
332 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
333 return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
334 }
335
336 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
337 return (stage_index < 4) && (update_mask_a & (1 << stage_index));
338 }
339 } tev_combiner_buffer_input;
340
341 INSERT_PADDING_WORDS(0xf);
313 TevStageConfig tev_stage4; 342 TevStageConfig tev_stage4;
314 INSERT_PADDING_WORDS(0x3); 343 INSERT_PADDING_WORDS(0x3);
315 TevStageConfig tev_stage5; 344 TevStageConfig tev_stage5;
316 INSERT_PADDING_WORDS(0x3); 345
346 union {
347 BitField< 0, 8, u32> r;
348 BitField< 8, 8, u32> g;
349 BitField<16, 8, u32> b;
350 BitField<24, 8, u32> a;
351 } tev_combiner_buffer_color;
352
353 INSERT_PADDING_WORDS(0x2);
317 354
318 const std::array<Regs::TevStageConfig,6> GetTevStages() const { 355 const std::array<Regs::TevStageConfig,6> GetTevStages() const {
319 return { tev_stage0, tev_stage1, 356 return { tev_stage0, tev_stage1,
@@ -426,9 +463,7 @@ struct Regs {
426 D24S8 = 3 463 D24S8 = 3
427 }; 464 };
428 465
429 /* 466 // Returns the number of bytes in the specified depth format
430 * Returns the number of bytes in the specified depth format
431 */
432 static u32 BytesPerDepthPixel(DepthFormat format) { 467 static u32 BytesPerDepthPixel(DepthFormat format) {
433 switch (format) { 468 switch (format) {
434 case DepthFormat::D16: 469 case DepthFormat::D16:
@@ -443,6 +478,20 @@ struct Regs {
443 } 478 }
444 } 479 }
445 480
481 // Returns the number of bits per depth component of the specified depth format
482 static u32 DepthBitsPerPixel(DepthFormat format) {
483 switch (format) {
484 case DepthFormat::D16:
485 return 16;
486 case DepthFormat::D24:
487 case DepthFormat::D24S8:
488 return 24;
489 default:
490 LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
491 UNIMPLEMENTED();
492 }
493 }
494
446 struct { 495 struct {
447 // Components are laid out in reverse byte order, most significant bits first. 496 // Components are laid out in reverse byte order, most significant bits first.
448 enum ColorFormat : u32 { 497 enum ColorFormat : u32 {
@@ -784,8 +833,10 @@ struct Regs {
784 ADD_FIELD(tev_stage1); 833 ADD_FIELD(tev_stage1);
785 ADD_FIELD(tev_stage2); 834 ADD_FIELD(tev_stage2);
786 ADD_FIELD(tev_stage3); 835 ADD_FIELD(tev_stage3);
836 ADD_FIELD(tev_combiner_buffer_input);
787 ADD_FIELD(tev_stage4); 837 ADD_FIELD(tev_stage4);
788 ADD_FIELD(tev_stage5); 838 ADD_FIELD(tev_stage5);
839 ADD_FIELD(tev_combiner_buffer_color);
789 ADD_FIELD(output_merger); 840 ADD_FIELD(output_merger);
790 ADD_FIELD(framebuffer); 841 ADD_FIELD(framebuffer);
791 ADD_FIELD(vertex_attributes); 842 ADD_FIELD(vertex_attributes);
@@ -859,8 +910,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0);
859ASSERT_REG_POSITION(tev_stage1, 0xc8); 910ASSERT_REG_POSITION(tev_stage1, 0xc8);
860ASSERT_REG_POSITION(tev_stage2, 0xd0); 911ASSERT_REG_POSITION(tev_stage2, 0xd0);
861ASSERT_REG_POSITION(tev_stage3, 0xd8); 912ASSERT_REG_POSITION(tev_stage3, 0xd8);
913ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
862ASSERT_REG_POSITION(tev_stage4, 0xf0); 914ASSERT_REG_POSITION(tev_stage4, 0xf0);
863ASSERT_REG_POSITION(tev_stage5, 0xf8); 915ASSERT_REG_POSITION(tev_stage5, 0xf8);
916ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
864ASSERT_REG_POSITION(output_merger, 0x100); 917ASSERT_REG_POSITION(output_merger, 0x100);
865ASSERT_REG_POSITION(framebuffer, 0x110); 918ASSERT_REG_POSITION(framebuffer, 0x110);
866ASSERT_REG_POSITION(vertex_attributes, 0x200); 919ASSERT_REG_POSITION(vertex_attributes, 0x200);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 3b3fef484..46a326bb4 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -90,7 +90,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
90 UNIMPLEMENTED(); 90 UNIMPLEMENTED();
91 } 91 }
92 92
93 return {}; 93 return {0, 0, 0, 0};
94} 94}
95 95
96static u32 GetDepth(int x, int y) { 96static u32 GetDepth(int x, int y) {
@@ -376,7 +376,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
376 // with some basic arithmetic. Alpha combiners can be configured separately but work 376 // with some basic arithmetic. Alpha combiners can be configured separately but work
377 // analogously. 377 // analogously.
378 Math::Vec4<u8> combiner_output; 378 Math::Vec4<u8> combiner_output;
379 for (const auto& tev_stage : tev_stages) { 379 Math::Vec4<u8> combiner_buffer = {
380 registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g,
381 registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a
382 };
383
384 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
385 const auto& tev_stage = tev_stages[tev_stage_index];
380 using Source = Regs::TevStageConfig::Source; 386 using Source = Regs::TevStageConfig::Source;
381 using ColorModifier = Regs::TevStageConfig::ColorModifier; 387 using ColorModifier = Regs::TevStageConfig::ColorModifier;
382 using AlphaModifier = Regs::TevStageConfig::AlphaModifier; 388 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
@@ -398,6 +404,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
398 case Source::Texture2: 404 case Source::Texture2:
399 return texture_color[2]; 405 return texture_color[2];
400 406
407 case Source::PreviousBuffer:
408 return combiner_buffer;
409
401 case Source::Constant: 410 case Source::Constant:
402 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; 411 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
403 412
@@ -407,7 +416,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
407 default: 416 default:
408 LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); 417 LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
409 UNIMPLEMENTED(); 418 UNIMPLEMENTED();
410 return {}; 419 return {0, 0, 0, 0};
411 } 420 }
412 }; 421 };
413 422
@@ -490,6 +499,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
490 return result.Cast<u8>(); 499 return result.Cast<u8>();
491 } 500 }
492 501
502 case Operation::AddSigned:
503 {
504 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
505 auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
506 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
507 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
508 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
509 return result.Cast<u8>();
510 }
511
493 case Operation::Lerp: 512 case Operation::Lerp:
494 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); 513 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
495 514
@@ -524,7 +543,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
524 default: 543 default:
525 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); 544 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
526 UNIMPLEMENTED(); 545 UNIMPLEMENTED();
527 return {}; 546 return {0, 0, 0};
528 } 547 }
529 }; 548 };
530 549
@@ -578,7 +597,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
578 }; 597 };
579 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); 598 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
580 599
581 combiner_output = Math::MakeVec(color_output, alpha_output); 600 combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
601 combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier());
602 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
603 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
604
605 if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
606 combiner_buffer.r() = combiner_output.r();
607 combiner_buffer.g() = combiner_output.g();
608 combiner_buffer.b() = combiner_output.b();
609 }
610
611 if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
612 combiner_buffer.a() = combiner_output.a();
613 }
582 } 614 }
583 615
584 if (registers.output_merger.alpha_test.enable) { 616 if (registers.output_merger.alpha_test.enable) {
@@ -624,9 +656,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
624 656
625 // TODO: Does depth indeed only get written even if depth testing is enabled? 657 // TODO: Does depth indeed only get written even if depth testing is enabled?
626 if (registers.output_merger.depth_test_enable) { 658 if (registers.output_merger.depth_test_enable) {
627 u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + 659 unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format);
628 v1.screenpos[2].ToFloat32() * w1 + 660 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 +
629 v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); 661 v1.screenpos[2].ToFloat32() * w1 +
662 v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
630 u32 ref_z = GetDepth(x >> 4, y >> 4); 663 u32 ref_z = GetDepth(x >> 4, y >> 4);
631 664
632 bool pass = false; 665 bool pass = false;
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 51f4e58bf..885b7de59 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -235,6 +235,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
235 break; 235 break;
236 } 236 }
237 237
238 case OpCode::Id::FLR:
239 for (int i = 0; i < 4; ++i) {
240 if (!swizzle.DestComponentEnabled(i))
241 continue;
242
243 dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
244 }
245 break;
246
238 case OpCode::Id::MAX: 247 case OpCode::Id::MAX:
239 for (int i = 0; i < 4; ++i) { 248 for (int i = 0; i < 4; ++i) {
240 if (!swizzle.DestComponentEnabled(i)) 249 if (!swizzle.DestComponentEnabled(i))
@@ -366,12 +375,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
366 375
367 case OpCode::Type::MultiplyAdd: 376 case OpCode::Type::MultiplyAdd:
368 { 377 {
369 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { 378 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
379 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
370 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; 380 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
371 381
372 const float24* src1_ = LookupSourceRegister(instr.mad.src1); 382 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
373 const float24* src2_ = LookupSourceRegister(instr.mad.src2); 383
374 const float24* src3_ = LookupSourceRegister(instr.mad.src3); 384 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
385 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
386 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
375 387
376 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); 388 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
377 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); 389 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);