diff options
| author | 2015-02-18 17:19:38 -0500 | |
|---|---|---|
| committer | 2015-02-18 17:19:38 -0500 | |
| commit | 4a48b017ca7fe8fe68dfc84d70864ef6aea6a266 (patch) | |
| tree | dcd7914a3a2147790d384ce0992f70d40bce8704 /src | |
| parent | Merge pull request #570 from purpasmart96/config_mem (diff) | |
| parent | Pica/Rasterizer: Replace exit() calls with UNIMPLEMENTED(). (diff) | |
| download | yuzu-4a48b017ca7fe8fe68dfc84d70864ef6aea6a266.tar.gz yuzu-4a48b017ca7fe8fe68dfc84d70864ef6aea6a266.tar.xz yuzu-4a48b017ca7fe8fe68dfc84d70864ef6aea6a266.zip | |
Merge pull request #562 from neobrain/pica_progress3
More PICA200 Emulation Fixes
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/gsp_gpu.cpp | 34 | ||||
| -rw-r--r-- | src/core/hle/service/gsp_gpu.h | 4 | ||||
| -rw-r--r-- | src/core/hw/gpu.cpp | 41 | ||||
| -rw-r--r-- | src/core/hw/gpu.h | 32 | ||||
| -rw-r--r-- | src/video_core/clipper.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 32 | ||||
| -rw-r--r-- | src/video_core/pica.h | 24 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 252 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 50 |
9 files changed, 341 insertions, 212 deletions
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 31e61391f..c23cfa3c8 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp | |||
| @@ -368,28 +368,28 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||
| 368 | case CommandId::SET_MEMORY_FILL: | 368 | case CommandId::SET_MEMORY_FILL: |
| 369 | { | 369 | { |
| 370 | auto& params = command.memory_fill; | 370 | auto& params = command.memory_fill; |
| 371 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)), | 371 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)), |
| 372 | Memory::VirtualToPhysicalAddress(params.start1) >> 3); | 372 | Memory::VirtualToPhysicalAddress(params.start1) >> 3); |
| 373 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)), | 373 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)), |
| 374 | Memory::VirtualToPhysicalAddress(params.end1) >> 3); | 374 | Memory::VirtualToPhysicalAddress(params.end1) >> 3); |
| 375 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].size)), params.end1 - params.start1); | 375 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value_32bit)), params.value1); |
| 376 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value)), params.value1); | 376 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].control)), params.control1); |
| 377 | 377 | ||
| 378 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)), | 378 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)), |
| 379 | Memory::VirtualToPhysicalAddress(params.start2) >> 3); | 379 | Memory::VirtualToPhysicalAddress(params.start2) >> 3); |
| 380 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)), | 380 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)), |
| 381 | Memory::VirtualToPhysicalAddress(params.end2) >> 3); | 381 | Memory::VirtualToPhysicalAddress(params.end2) >> 3); |
| 382 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].size)), params.end2 - params.start2); | 382 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value_32bit)), params.value2); |
| 383 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value)), params.value2); | 383 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].control)), params.control2); |
| 384 | break; | 384 | break; |
| 385 | } | 385 | } |
| 386 | 386 | ||
| 387 | case CommandId::SET_DISPLAY_TRANSFER: | 387 | case CommandId::SET_DISPLAY_TRANSFER: |
| 388 | { | 388 | { |
| 389 | auto& params = command.image_copy; | 389 | auto& params = command.image_copy; |
| 390 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | 390 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), |
| 391 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | 391 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |
| 392 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | 392 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), |
| 393 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | 393 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); |
| 394 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); | 394 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); |
| 395 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); | 395 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); |
| @@ -402,9 +402,9 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||
| 402 | case CommandId::SET_TEXTURE_COPY: | 402 | case CommandId::SET_TEXTURE_COPY: |
| 403 | { | 403 | { |
| 404 | auto& params = command.image_copy; | 404 | auto& params = command.image_copy; |
| 405 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | 405 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), |
| 406 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | 406 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |
| 407 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | 407 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), |
| 408 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | 408 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); |
| 409 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); | 409 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); |
| 410 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); | 410 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); |
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h index 65abb194a..a435d418a 100644 --- a/src/core/hle/service/gsp_gpu.h +++ b/src/core/hle/service/gsp_gpu.h | |||
| @@ -109,9 +109,13 @@ struct Command { | |||
| 109 | u32 start1; | 109 | u32 start1; |
| 110 | u32 value1; | 110 | u32 value1; |
| 111 | u32 end1; | 111 | u32 end1; |
| 112 | |||
| 112 | u32 start2; | 113 | u32 start2; |
| 113 | u32 value2; | 114 | u32 value2; |
| 114 | u32 end2; | 115 | u32 end2; |
| 116 | |||
| 117 | u16 control1; | ||
| 118 | u16 control2; | ||
| 115 | } memory_fill; | 119 | } memory_fill; |
| 116 | 120 | ||
| 117 | struct { | 121 | struct { |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index aad0e5d0d..bd7d92cd1 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -67,23 +67,38 @@ inline void Write(u32 addr, const T data) { | |||
| 67 | switch (index) { | 67 | switch (index) { |
| 68 | 68 | ||
| 69 | // Memory fills are triggered once the fill value is written. | 69 | // Memory fills are triggered once the fill value is written. |
| 70 | // NOTE: This is not verified. | 70 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): |
| 71 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3): | 71 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): |
| 72 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3): | ||
| 73 | { | 72 | { |
| 74 | const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value)); | 73 | const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); |
| 75 | const auto& config = g_regs.memory_fill_config[is_second_filler]; | 74 | auto& config = g_regs.memory_fill_config[is_second_filler]; |
| 76 | 75 | ||
| 77 | // TODO: Not sure if this check should be done at GSP level instead | 76 | if (config.address_start && config.trigger) { |
| 78 | if (config.address_start) { | 77 | u8* start = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress())); |
| 79 | // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all | 78 | u8* end = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress())); |
| 80 | u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress())); | 79 | |
| 81 | u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress())); | 80 | if (config.fill_24bit) { |
| 82 | for (u32* ptr = start; ptr < end; ++ptr) | 81 | // fill with 24-bit values |
| 83 | *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation | 82 | for (u8* ptr = start; ptr < end; ptr += 3) { |
| 83 | ptr[0] = config.value_24bit_b; | ||
| 84 | ptr[1] = config.value_24bit_g; | ||
| 85 | ptr[2] = config.value_24bit_r; | ||
| 86 | } | ||
| 87 | } else if (config.fill_32bit) { | ||
| 88 | // fill with 32-bit values | ||
| 89 | for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr) | ||
| 90 | *ptr = config.value_32bit; | ||
| 91 | } else { | ||
| 92 | // fill with 16-bit values | ||
| 93 | for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr) | ||
| 94 | *ptr = config.value_16bit; | ||
| 95 | } | ||
| 84 | 96 | ||
| 85 | LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); | 97 | LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); |
| 86 | 98 | ||
| 99 | config.trigger = 0; | ||
| 100 | config.finished = 1; | ||
| 101 | |||
| 87 | if (!is_second_filler) { | 102 | if (!is_second_filler) { |
| 88 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); | 103 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); |
| 89 | } else { | 104 | } else { |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 9fd694f65..df9aa0d71 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h | |||
| @@ -84,9 +84,35 @@ struct Regs { | |||
| 84 | 84 | ||
| 85 | struct { | 85 | struct { |
| 86 | u32 address_start; | 86 | u32 address_start; |
| 87 | u32 address_end; // ? | 87 | u32 address_end; |
| 88 | u32 size; | 88 | |
| 89 | u32 value; // ? | 89 | union { |
| 90 | u32 value_32bit; | ||
| 91 | |||
| 92 | BitField<0, 16, u32> value_16bit; | ||
| 93 | |||
| 94 | // TODO: Verify component order | ||
| 95 | BitField< 0, 8, u32> value_24bit_r; | ||
| 96 | BitField< 8, 8, u32> value_24bit_g; | ||
| 97 | BitField<16, 8, u32> value_24bit_b; | ||
| 98 | }; | ||
| 99 | |||
| 100 | union { | ||
| 101 | u32 control; | ||
| 102 | |||
| 103 | // Setting this field to 1 triggers the memory fill. | ||
| 104 | // This field also acts as a status flag, and gets reset to 0 upon completion. | ||
| 105 | BitField<0, 1, u32> trigger; | ||
| 106 | |||
| 107 | // Set to 1 upon completion. | ||
| 108 | BitField<0, 1, u32> finished; | ||
| 109 | |||
| 110 | // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values | ||
| 111 | BitField<8, 1, u32> fill_24bit; | ||
| 112 | |||
| 113 | // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values | ||
| 114 | BitField<9, 1, u32> fill_32bit; | ||
| 115 | }; | ||
| 90 | 116 | ||
| 91 | inline u32 GetStartAddress() const { | 117 | inline u32 GetStartAddress() const { |
| 92 | return DecodeAddressRegister(address_start); | 118 | return DecodeAddressRegister(address_start); |
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 1744066ba..ba3876a76 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -15,30 +15,18 @@ namespace Clipper { | |||
| 15 | 15 | ||
| 16 | struct ClippingEdge { | 16 | struct ClippingEdge { |
| 17 | public: | 17 | public: |
| 18 | enum Type { | 18 | ClippingEdge(Math::Vec4<float24> coeffs, |
| 19 | POS_X = 0, | 19 | Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0), |
| 20 | NEG_X = 1, | 20 | float24::FromFloat32(0), |
| 21 | POS_Y = 2, | 21 | float24::FromFloat32(0), |
| 22 | NEG_Y = 3, | 22 | float24::FromFloat32(0))) |
| 23 | POS_Z = 4, | 23 | : coeffs(coeffs), |
| 24 | NEG_Z = 5, | 24 | bias(bias) |
| 25 | }; | 25 | { |
| 26 | 26 | } | |
| 27 | ClippingEdge(Type type, float24 position) : type(type), pos(position) {} | ||
| 28 | 27 | ||
| 29 | bool IsInside(const OutputVertex& vertex) const { | 28 | bool IsInside(const OutputVertex& vertex) const { |
| 30 | switch (type) { | 29 | return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); |
| 31 | case POS_X: return vertex.pos.x <= pos * vertex.pos.w; | ||
| 32 | case NEG_X: return vertex.pos.x >= pos * vertex.pos.w; | ||
| 33 | case POS_Y: return vertex.pos.y <= pos * vertex.pos.w; | ||
| 34 | case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w; | ||
| 35 | |||
| 36 | // TODO: Check z compares ... should be 0..1 instead? | ||
| 37 | case POS_Z: return vertex.pos.z <= pos * vertex.pos.w; | ||
| 38 | |||
| 39 | default: | ||
| 40 | case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w; | ||
| 41 | } | ||
| 42 | } | 30 | } |
| 43 | 31 | ||
| 44 | bool IsOutSide(const OutputVertex& vertex) const { | 32 | bool IsOutSide(const OutputVertex& vertex) const { |
| @@ -46,31 +34,17 @@ public: | |||
| 46 | } | 34 | } |
| 47 | 35 | ||
| 48 | OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { | 36 | OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { |
| 49 | auto dotpr = [this](const OutputVertex& vtx) { | 37 | float24 dp = Math::Dot(v0.pos + bias, coeffs); |
| 50 | switch (type) { | 38 | float24 dp_prev = Math::Dot(v1.pos + bias, coeffs); |
| 51 | case POS_X: return vtx.pos.x - vtx.pos.w; | ||
| 52 | case NEG_X: return -vtx.pos.x - vtx.pos.w; | ||
| 53 | case POS_Y: return vtx.pos.y - vtx.pos.w; | ||
| 54 | case NEG_Y: return -vtx.pos.y - vtx.pos.w; | ||
| 55 | |||
| 56 | // TODO: Verify z clipping | ||
| 57 | case POS_Z: return vtx.pos.z - vtx.pos.w; | ||
| 58 | |||
| 59 | default: | ||
| 60 | case NEG_Z: return -vtx.pos.w; | ||
| 61 | } | ||
| 62 | }; | ||
| 63 | |||
| 64 | float24 dp = dotpr(v0); | ||
| 65 | float24 dp_prev = dotpr(v1); | ||
| 66 | float24 factor = dp_prev / (dp_prev - dp); | 39 | float24 factor = dp_prev / (dp_prev - dp); |
| 67 | 40 | ||
| 68 | return OutputVertex::Lerp(factor, v0, v1); | 41 | return OutputVertex::Lerp(factor, v0, v1); |
| 69 | } | 42 | } |
| 70 | 43 | ||
| 71 | private: | 44 | private: |
| 72 | Type type; | ||
| 73 | float24 pos; | 45 | float24 pos; |
| 46 | Math::Vec4<float24> coeffs; | ||
| 47 | Math::Vec4<float24> bias; | ||
| 74 | }; | 48 | }; |
| 75 | 49 | ||
| 76 | static void InitScreenCoordinates(OutputVertex& vtx) | 50 | static void InitScreenCoordinates(OutputVertex& vtx) |
| @@ -98,10 +72,9 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 98 | vtx.tc2 *= inv_w; | 72 | vtx.tc2 *= inv_w; |
| 99 | vtx.pos.w = inv_w; | 73 | vtx.pos.w = inv_w; |
| 100 | 74 | ||
| 101 | // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not | ||
| 102 | vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; | 75 | vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; |
| 103 | vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | 76 | vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; |
| 104 | vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale; | 77 | vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; |
| 105 | } | 78 | } |
| 106 | 79 | ||
| 107 | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | 80 | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { |
| @@ -117,14 +90,29 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | |||
| 117 | auto* output_list = &buffer_a; | 90 | auto* output_list = &buffer_a; |
| 118 | auto* input_list = &buffer_b; | 91 | auto* input_list = &buffer_b; |
| 119 | 92 | ||
| 93 | // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. | ||
| 94 | // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest | ||
| 95 | // epsilon possible within float24 accuracy. | ||
| 96 | static const float24 EPSILON = float24::FromFloat32(0.00001); | ||
| 97 | static const float24 f0 = float24::FromFloat32(0.0); | ||
| 98 | static const float24 f1 = float24::FromFloat32(1.0); | ||
| 99 | static const std::array<ClippingEdge, 7> clipping_edges = {{ | ||
| 100 | { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w | ||
| 101 | { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w | ||
| 102 | { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w | ||
| 103 | { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w | ||
| 104 | { Math::MakeVec( f0, f0, f1, f0) }, // z = 0 | ||
| 105 | { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w | ||
| 106 | { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON | ||
| 107 | }}; | ||
| 108 | |||
| 109 | // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) | ||
| 110 | // drop the whole primitive instead of clipping the primitive properly. We should test if | ||
| 111 | // this happens on the 3DS, too. | ||
| 112 | |||
| 120 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. | 113 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. |
| 121 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) | 114 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) |
| 122 | for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)), | 115 | for (auto edge : clipping_edges) { |
| 123 | ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)), | ||
| 124 | ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)), | ||
| 125 | ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)), | ||
| 126 | ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), | ||
| 127 | ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { | ||
| 128 | 116 | ||
| 129 | std::swap(input_list, output_list); | 117 | std::swap(input_list, output_list); |
| 130 | output_list->clear(); | 118 | output_list->clear(); |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 0d9f4ba66..586ad62b6 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <boost/range/algorithm/fill.hpp> | ||
| 6 | |||
| 5 | #include "clipper.h" | 7 | #include "clipper.h" |
| 6 | #include "command_processor.h" | 8 | #include "command_processor.h" |
| 7 | #include "math.h" | 9 | #include "math.h" |
| @@ -23,10 +25,6 @@ static int float_regs_counter = 0; | |||
| 23 | 25 | ||
| 24 | static u32 uniform_write_buffer[4]; | 26 | static u32 uniform_write_buffer[4]; |
| 25 | 27 | ||
| 26 | // Used for VSLoadProgramData and VSLoadSwizzleData | ||
| 27 | static u32 vs_binary_write_offset = 0; | ||
| 28 | static u32 vs_swizzle_write_offset = 0; | ||
| 29 | |||
| 30 | static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | 28 | static inline void WritePicaReg(u32 id, u32 value, u32 mask) { |
| 31 | 29 | ||
| 32 | if (id >= registers.NumIds()) | 30 | if (id >= registers.NumIds()) |
| @@ -65,10 +63,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 65 | 63 | ||
| 66 | // Information about internal vertex attributes | 64 | // Information about internal vertex attributes |
| 67 | u32 vertex_attribute_sources[16]; | 65 | u32 vertex_attribute_sources[16]; |
| 68 | std::fill(vertex_attribute_sources, &vertex_attribute_sources[16], 0xdeadbeef); | 66 | boost::fill(vertex_attribute_sources, 0xdeadbeef); |
| 69 | u32 vertex_attribute_strides[16]; | 67 | u32 vertex_attribute_strides[16]; |
| 70 | u32 vertex_attribute_formats[16]; | 68 | u32 vertex_attribute_formats[16]; |
| 71 | u32 vertex_attribute_elements[16]; | 69 | |
| 70 | // HACK: Initialize vertex_attribute_elements to zero to prevent infinite loops below. | ||
| 71 | // This is one of the hacks required to deal with uninitalized vertex attributes. | ||
| 72 | // TODO: Fix this properly. | ||
| 73 | u32 vertex_attribute_elements[16] = {}; | ||
| 72 | u32 vertex_attribute_element_size[16]; | 74 | u32 vertex_attribute_element_size[16]; |
| 73 | 75 | ||
| 74 | // Setup attribute data from loaders | 76 | // Setup attribute data from loaders |
| @@ -252,11 +254,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 252 | break; | 254 | break; |
| 253 | } | 255 | } |
| 254 | 256 | ||
| 255 | // Seems to be used to reset the write pointer for VSLoadProgramData | ||
| 256 | case PICA_REG_INDEX(vs_program.begin_load): | ||
| 257 | vs_binary_write_offset = 0; | ||
| 258 | break; | ||
| 259 | |||
| 260 | // Load shader program code | 257 | // Load shader program code |
| 261 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): | 258 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): |
| 262 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): | 259 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): |
| @@ -267,16 +264,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 267 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): | 264 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): |
| 268 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): | 265 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): |
| 269 | { | 266 | { |
| 270 | VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value); | 267 | VertexShader::SubmitShaderMemoryChange(registers.vs_program.offset, value); |
| 271 | vs_binary_write_offset++; | 268 | registers.vs_program.offset++; |
| 272 | break; | 269 | break; |
| 273 | } | 270 | } |
| 274 | 271 | ||
| 275 | // Seems to be used to reset the write pointer for VSLoadSwizzleData | ||
| 276 | case PICA_REG_INDEX(vs_swizzle_patterns.begin_load): | ||
| 277 | vs_swizzle_write_offset = 0; | ||
| 278 | break; | ||
| 279 | |||
| 280 | // Load swizzle pattern data | 272 | // Load swizzle pattern data |
| 281 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): | 273 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): |
| 282 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): | 274 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): |
| @@ -287,8 +279,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 287 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): | 279 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): |
| 288 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): | 280 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): |
| 289 | { | 281 | { |
| 290 | VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value); | 282 | VertexShader::SubmitSwizzleDataChange(registers.vs_swizzle_patterns.offset, value); |
| 291 | vs_swizzle_write_offset++; | 283 | registers.vs_swizzle_patterns.offset++; |
| 292 | break; | 284 | break; |
| 293 | } | 285 | } |
| 294 | 286 | ||
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 9c1a12dc8..e4a5ef78e 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -118,8 +118,9 @@ struct Regs { | |||
| 118 | 118 | ||
| 119 | struct TextureConfig { | 119 | struct TextureConfig { |
| 120 | enum WrapMode : u32 { | 120 | enum WrapMode : u32 { |
| 121 | ClampToEdge = 0, | 121 | ClampToEdge = 0, |
| 122 | Repeat = 2, | 122 | Repeat = 2, |
| 123 | MirroredRepeat = 3, | ||
| 123 | }; | 124 | }; |
| 124 | 125 | ||
| 125 | INSERT_PADDING_WORDS(0x1); | 126 | INSERT_PADDING_WORDS(0x1); |
| @@ -131,7 +132,7 @@ struct Regs { | |||
| 131 | 132 | ||
| 132 | union { | 133 | union { |
| 133 | BitField< 8, 2, WrapMode> wrap_s; | 134 | BitField< 8, 2, WrapMode> wrap_s; |
| 134 | BitField<11, 2, WrapMode> wrap_t; | 135 | BitField<12, 2, WrapMode> wrap_t; |
| 135 | }; | 136 | }; |
| 136 | 137 | ||
| 137 | INSERT_PADDING_WORDS(0x1); | 138 | INSERT_PADDING_WORDS(0x1); |
| @@ -223,6 +224,8 @@ struct Regs { | |||
| 223 | struct TevStageConfig { | 224 | struct TevStageConfig { |
| 224 | enum class Source : u32 { | 225 | enum class Source : u32 { |
| 225 | PrimaryColor = 0x0, | 226 | PrimaryColor = 0x0, |
| 227 | PrimaryFragmentColor = 0x1, | ||
| 228 | |||
| 226 | Texture0 = 0x3, | 229 | Texture0 = 0x3, |
| 227 | Texture1 = 0x4, | 230 | Texture1 = 0x4, |
| 228 | Texture2 = 0x5, | 231 | Texture2 = 0x5, |
| @@ -265,6 +268,9 @@ struct Regs { | |||
| 265 | AddSigned = 3, | 268 | AddSigned = 3, |
| 266 | Lerp = 4, | 269 | Lerp = 4, |
| 267 | Subtract = 5, | 270 | Subtract = 5, |
| 271 | |||
| 272 | MultiplyThenAdd = 8, | ||
| 273 | AddThenMultiply = 9, | ||
| 268 | }; | 274 | }; |
| 269 | 275 | ||
| 270 | union { | 276 | union { |
| @@ -337,7 +343,7 @@ struct Regs { | |||
| 337 | }; | 343 | }; |
| 338 | 344 | ||
| 339 | union { | 345 | union { |
| 340 | enum BlendEquation : u32 { | 346 | enum class BlendEquation : u32 { |
| 341 | Add = 0, | 347 | Add = 0, |
| 342 | Subtract = 1, | 348 | Subtract = 1, |
| 343 | ReverseSubtract = 2, | 349 | ReverseSubtract = 2, |
| @@ -421,7 +427,7 @@ struct Regs { | |||
| 421 | INSERT_PADDING_WORDS(0x6); | 427 | INSERT_PADDING_WORDS(0x6); |
| 422 | 428 | ||
| 423 | u32 depth_format; | 429 | u32 depth_format; |
| 424 | u32 color_format; | 430 | BitField<16, 3, u32> color_format; |
| 425 | 431 | ||
| 426 | INSERT_PADDING_WORDS(0x4); | 432 | INSERT_PADDING_WORDS(0x4); |
| 427 | 433 | ||
| @@ -678,7 +684,9 @@ struct Regs { | |||
| 678 | INSERT_PADDING_WORDS(0x2); | 684 | INSERT_PADDING_WORDS(0x2); |
| 679 | 685 | ||
| 680 | struct { | 686 | struct { |
| 681 | u32 begin_load; | 687 | // Offset of the next instruction to write code to. |
| 688 | // Incremented with each instruction write. | ||
| 689 | u32 offset; | ||
| 682 | 690 | ||
| 683 | // Writing to these registers sets the "current" word in the shader program. | 691 | // Writing to these registers sets the "current" word in the shader program. |
| 684 | // TODO: It's not clear how the hardware stores what the "current" word is. | 692 | // TODO: It's not clear how the hardware stores what the "current" word is. |
| @@ -690,7 +698,9 @@ struct Regs { | |||
| 690 | // This register group is used to load an internal table of swizzling patterns, | 698 | // This register group is used to load an internal table of swizzling patterns, |
| 691 | // which are indexed by each shader instruction to specify vector component swizzling. | 699 | // which are indexed by each shader instruction to specify vector component swizzling. |
| 692 | struct { | 700 | struct { |
| 693 | u32 begin_load; | 701 | // Offset of the next swizzle pattern to write code to. |
| 702 | // Incremented with each instruction write. | ||
| 703 | u32 offset; | ||
| 694 | 704 | ||
| 695 | // Writing to these registers sets the "current" swizzle pattern in the table. | 705 | // Writing to these registers sets the "current" swizzle pattern in the table. |
| 696 | // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is. | 706 | // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is. |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 3faa10153..94873f406 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "common/math_util.h" | ||
| 8 | 9 | ||
| 9 | #include "math.h" | 10 | #include "math.h" |
| 10 | #include "pica.h" | 11 | #include "pica.h" |
| @@ -20,16 +21,31 @@ namespace Rasterizer { | |||
| 20 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | 21 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { |
| 21 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | 22 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); |
| 22 | u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | 23 | u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 23 | u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | ||
| 24 | 24 | ||
| 25 | // Assuming RGBA8 format until actual framebuffer format handling is implemented | 25 | // Similarly to textures, the render framebuffer is laid out from bottom to top, too. |
| 26 | *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; | 26 | // NOTE: The framebuffer height register contains the actual FB height minus one. |
| 27 | y = (registers.framebuffer.height - y); | ||
| 28 | |||
| 29 | switch (registers.framebuffer.color_format) { | ||
| 30 | case registers.framebuffer.RGBA8: | ||
| 31 | { | ||
| 32 | u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | ||
| 33 | *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | |||
| 37 | default: | ||
| 38 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | ||
| 39 | UNIMPLEMENTED(); | ||
| 40 | } | ||
| 27 | } | 41 | } |
| 28 | 42 | ||
| 29 | static const Math::Vec4<u8> GetPixel(int x, int y) { | 43 | static const Math::Vec4<u8> GetPixel(int x, int y) { |
| 30 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | 44 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); |
| 31 | u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | 45 | u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 32 | 46 | ||
| 47 | y = (registers.framebuffer.height - y); | ||
| 48 | |||
| 33 | u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); | 49 | u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); |
| 34 | Math::Vec4<u8> ret; | 50 | Math::Vec4<u8> ret; |
| 35 | ret.a() = value >> 24; | 51 | ret.a() = value >> 24; |
| @@ -43,6 +59,8 @@ static u32 GetDepth(int x, int y) { | |||
| 43 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 59 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 44 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | 60 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 45 | 61 | ||
| 62 | y = (registers.framebuffer.height - y); | ||
| 63 | |||
| 46 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 64 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 47 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); | 65 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); |
| 48 | } | 66 | } |
| @@ -51,6 +69,8 @@ static void SetDepth(int x, int y, u16 value) { | |||
| 51 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 69 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 52 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | 70 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 53 | 71 | ||
| 72 | y = (registers.framebuffer.height - y); | ||
| 73 | |||
| 54 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 74 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 55 | *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; | 75 | *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; |
| 56 | } | 76 | } |
| @@ -90,30 +110,43 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | |||
| 90 | return Math::Cross(vec1, vec2).z; | 110 | return Math::Cross(vec1, vec2).z; |
| 91 | }; | 111 | }; |
| 92 | 112 | ||
| 93 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | 113 | /** |
| 94 | const VertexShader::OutputVertex& v1, | 114 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing |
| 95 | const VertexShader::OutputVertex& v2) | 115 | * culling via recursion. |
| 116 | */ | ||
| 117 | static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||
| 118 | const VertexShader::OutputVertex& v1, | ||
| 119 | const VertexShader::OutputVertex& v2, | ||
| 120 | bool reversed = false) | ||
| 96 | { | 121 | { |
| 97 | // vertex positions in rasterizer coordinates | 122 | // vertex positions in rasterizer coordinates |
| 98 | auto FloatToFix = [](float24 flt) { | 123 | static auto FloatToFix = [](float24 flt) { |
| 99 | return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f)); | 124 | // TODO: Rounding here is necessary to prevent garbage pixels at |
| 100 | }; | 125 | // triangle borders. Is it that the correct solution, though? |
| 101 | auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { | 126 | return Fix12P4(static_cast<unsigned short>(round(flt.ToFloat32() * 16.0f))); |
| 102 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | 127 | }; |
| 103 | }; | 128 | static auto ScreenToRasterizerCoordinates = [](const Math::Vec3<float24>& vec) { |
| 129 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | ||
| 130 | }; | ||
| 104 | 131 | ||
| 105 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | 132 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), |
| 106 | ScreenToRasterizerCoordinates(v1.screenpos), | 133 | ScreenToRasterizerCoordinates(v1.screenpos), |
| 107 | ScreenToRasterizerCoordinates(v2.screenpos) }; | 134 | ScreenToRasterizerCoordinates(v2.screenpos) }; |
| 108 | 135 | ||
| 109 | if (registers.cull_mode == Regs::CullMode::KeepClockWise) { | 136 | if (registers.cull_mode == Regs::CullMode::KeepAll) { |
| 110 | // Reverse vertex order and use the CCW code path. | 137 | // Make sure we always end up with a triangle wound counter-clockwise |
| 111 | std::swap(vtxpos[1], vtxpos[2]); | 138 | if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { |
| 112 | } | 139 | ProcessTriangleInternal(v0, v2, v1, true); |
| 140 | return; | ||
| 141 | } | ||
| 142 | } else { | ||
| 143 | if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) { | ||
| 144 | // Reverse vertex order and use the CCW code path. | ||
| 145 | ProcessTriangleInternal(v0, v2, v1, true); | ||
| 146 | return; | ||
| 147 | } | ||
| 113 | 148 | ||
| 114 | if (registers.cull_mode != Regs::CullMode::KeepAll) { | ||
| 115 | // Cull away triangles which are wound clockwise. | 149 | // Cull away triangles which are wound clockwise. |
| 116 | // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll | ||
| 117 | if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) | 150 | if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) |
| 118 | return; | 151 | return; |
| 119 | } | 152 | } |
| @@ -155,9 +188,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 155 | auto textures = registers.GetTextures(); | 188 | auto textures = registers.GetTextures(); |
| 156 | auto tev_stages = registers.GetTevStages(); | 189 | auto tev_stages = registers.GetTevStages(); |
| 157 | 190 | ||
| 191 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. | ||
| 158 | // TODO: Not sure if looping through x first might be faster | 192 | // TODO: Not sure if looping through x first might be faster |
| 159 | for (u16 y = min_y; y < max_y; y += 0x10) { | 193 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { |
| 160 | for (u16 x = min_x; x < max_x; x += 0x10) { | 194 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { |
| 161 | 195 | ||
| 162 | // Calculate the barycentric coordinates w0, w1 and w2 | 196 | // Calculate the barycentric coordinates w0, w1 and w2 |
| 163 | int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | 197 | int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); |
| @@ -220,7 +254,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 220 | 254 | ||
| 221 | int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | 255 | int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); |
| 222 | int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | 256 | int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); |
| 223 | auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { | 257 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { |
| 224 | switch (mode) { | 258 | switch (mode) { |
| 225 | case Regs::TextureConfig::ClampToEdge: | 259 | case Regs::TextureConfig::ClampToEdge: |
| 226 | val = std::max(val, 0); | 260 | val = std::max(val, 0); |
| @@ -228,7 +262,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 228 | return val; | 262 | return val; |
| 229 | 263 | ||
| 230 | case Regs::TextureConfig::Repeat: | 264 | case Regs::TextureConfig::Repeat: |
| 231 | return (int)(((unsigned)val) % size); | 265 | return (int)((unsigned)val % size); |
| 266 | |||
| 267 | case Regs::TextureConfig::MirroredRepeat: | ||
| 268 | { | ||
| 269 | int val = (int)((unsigned)val % (2 * size)); | ||
| 270 | if (val >= size) | ||
| 271 | val = 2 * size - 1 - val; | ||
| 272 | return val; | ||
| 273 | } | ||
| 232 | 274 | ||
| 233 | default: | 275 | default: |
| 234 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode); | 276 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode); |
| @@ -236,6 +278,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 236 | return 0; | 278 | return 0; |
| 237 | } | 279 | } |
| 238 | }; | 280 | }; |
| 281 | |||
| 282 | // Textures are laid out from bottom to top, hence we invert the t coordinate. | ||
| 283 | // NOTE: This may not be the right place for the inversion. | ||
| 284 | // TODO: Check if this applies to ETC textures, too. | ||
| 239 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | 285 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); |
| 240 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | 286 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); |
| 241 | 287 | ||
| @@ -262,7 +308,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 262 | 308 | ||
| 263 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { | 309 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { |
| 264 | switch (source) { | 310 | switch (source) { |
| 311 | // TODO: What's the difference between these two? | ||
| 265 | case Source::PrimaryColor: | 312 | case Source::PrimaryColor: |
| 313 | case Source::PrimaryFragmentColor: | ||
| 266 | return primary_color; | 314 | return primary_color; |
| 267 | 315 | ||
| 268 | case Source::Texture0: | 316 | case Source::Texture0: |
| @@ -378,6 +426,25 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 378 | return result.Cast<u8>(); | 426 | return result.Cast<u8>(); |
| 379 | } | 427 | } |
| 380 | 428 | ||
| 429 | case Operation::MultiplyThenAdd: | ||
| 430 | { | ||
| 431 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||
| 432 | result.r() = std::min(255, result.r()); | ||
| 433 | result.g() = std::min(255, result.g()); | ||
| 434 | result.b() = std::min(255, result.b()); | ||
| 435 | return result.Cast<u8>(); | ||
| 436 | } | ||
| 437 | |||
| 438 | case Operation::AddThenMultiply: | ||
| 439 | { | ||
| 440 | auto result = input[0] + input[1]; | ||
| 441 | result.r() = std::min(255, result.r()); | ||
| 442 | result.g() = std::min(255, result.g()); | ||
| 443 | result.b() = std::min(255, result.b()); | ||
| 444 | result = (result * input[2].Cast<int>()) / 255; | ||
| 445 | return result.Cast<u8>(); | ||
| 446 | } | ||
| 447 | |||
| 381 | default: | 448 | default: |
| 382 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | 449 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); |
| 383 | UNIMPLEMENTED(); | 450 | UNIMPLEMENTED(); |
| @@ -402,6 +469,12 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 402 | case Operation::Subtract: | 469 | case Operation::Subtract: |
| 403 | return std::max(0, (int)input[0] - (int)input[1]); | 470 | return std::max(0, (int)input[0] - (int)input[1]); |
| 404 | 471 | ||
| 472 | case Operation::MultiplyThenAdd: | ||
| 473 | return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||
| 474 | |||
| 475 | case Operation::AddThenMultiply: | ||
| 476 | return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||
| 477 | |||
| 405 | default: | 478 | default: |
| 406 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); | 479 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); |
| 407 | UNIMPLEMENTED(); | 480 | UNIMPLEMENTED(); |
| @@ -475,7 +548,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 475 | 548 | ||
| 476 | // TODO: Does depth indeed only get written even if depth testing is enabled? | 549 | // TODO: Does depth indeed only get written even if depth testing is enabled? |
| 477 | if (registers.output_merger.depth_test_enable) { | 550 | if (registers.output_merger.depth_test_enable) { |
| 478 | u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 + | 551 | u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + |
| 479 | v1.screenpos[2].ToFloat32() * w1 + | 552 | v1.screenpos[2].ToFloat32() * w1 + |
| 480 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); | 553 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); |
| 481 | u16 ref_z = GetDepth(x >> 4, y >> 4); | 554 | u16 ref_z = GetDepth(x >> 4, y >> 4); |
| @@ -524,6 +597,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 524 | } | 597 | } |
| 525 | 598 | ||
| 526 | auto dest = GetPixel(x >> 4, y >> 4); | 599 | auto dest = GetPixel(x >> 4, y >> 4); |
| 600 | Math::Vec4<u8> blend_output = combiner_output; | ||
| 527 | 601 | ||
| 528 | if (registers.output_merger.alphablend_enable) { | 602 | if (registers.output_merger.alphablend_enable) { |
| 529 | auto params = registers.output_merger.alpha_blending; | 603 | auto params = registers.output_merger.alpha_blending; |
| @@ -574,7 +648,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 574 | 648 | ||
| 575 | default: | 649 | default: |
| 576 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | 650 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); |
| 577 | exit(0); | 651 | UNIMPLEMENTED(); |
| 578 | break; | 652 | break; |
| 579 | } | 653 | } |
| 580 | }; | 654 | }; |
| @@ -607,86 +681,78 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 607 | 681 | ||
| 608 | default: | 682 | default: |
| 609 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | 683 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); |
| 610 | exit(0); | 684 | UNIMPLEMENTED(); |
| 685 | break; | ||
| 686 | } | ||
| 687 | }; | ||
| 688 | |||
| 689 | using BlendEquation = decltype(params)::BlendEquation; | ||
| 690 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||
| 691 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||
| 692 | BlendEquation equation) { | ||
| 693 | Math::Vec4<int> result; | ||
| 694 | |||
| 695 | auto src_result = (src * srcfactor).Cast<int>(); | ||
| 696 | auto dst_result = (dest * destfactor).Cast<int>(); | ||
| 697 | |||
| 698 | switch (equation) { | ||
| 699 | case BlendEquation::Add: | ||
| 700 | result = (src_result + dst_result) / 255; | ||
| 611 | break; | 701 | break; |
| 702 | |||
| 703 | case BlendEquation::Subtract: | ||
| 704 | result = (src_result - dst_result) / 255; | ||
| 705 | break; | ||
| 706 | |||
| 707 | case BlendEquation::ReverseSubtract: | ||
| 708 | result = (dst_result - src_result) / 255; | ||
| 709 | break; | ||
| 710 | |||
| 711 | // TODO: How do these two actually work? | ||
| 712 | // OpenGL doesn't include the blend factors in the min/max computations, | ||
| 713 | // but is this what the 3DS actually does? | ||
| 714 | case BlendEquation::Min: | ||
| 715 | result.r() = std::min(src.r(), dest.r()); | ||
| 716 | result.g() = std::min(src.g(), dest.g()); | ||
| 717 | result.b() = std::min(src.b(), dest.b()); | ||
| 718 | result.a() = std::min(src.a(), dest.a()); | ||
| 719 | break; | ||
| 720 | |||
| 721 | case BlendEquation::Max: | ||
| 722 | result.r() = std::max(src.r(), dest.r()); | ||
| 723 | result.g() = std::max(src.g(), dest.g()); | ||
| 724 | result.b() = std::max(src.b(), dest.b()); | ||
| 725 | result.a() = std::max(src.a(), dest.a()); | ||
| 726 | break; | ||
| 727 | |||
| 728 | default: | ||
| 729 | LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||
| 730 | UNIMPLEMENTED(); | ||
| 612 | } | 731 | } |
| 732 | |||
| 733 | return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), | ||
| 734 | MathUtil::Clamp(result.g(), 0, 255), | ||
| 735 | MathUtil::Clamp(result.b(), 0, 255), | ||
| 736 | MathUtil::Clamp(result.a(), 0, 255)); | ||
| 613 | }; | 737 | }; |
| 614 | 738 | ||
| 615 | auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), | 739 | auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), |
| 616 | LookupFactorA(params.factor_source_a)); | 740 | LookupFactorA(params.factor_source_a)); |
| 617 | auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), | 741 | auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), |
| 618 | LookupFactorA(params.factor_dest_a)); | 742 | LookupFactorA(params.factor_dest_a)); |
| 619 | |||
| 620 | auto src_result = (combiner_output * srcfactor).Cast<int>(); | ||
| 621 | auto dst_result = (dest * dstfactor).Cast<int>(); | ||
| 622 | |||
| 623 | switch (params.blend_equation_rgb) { | ||
| 624 | case params.Add: | ||
| 625 | { | ||
| 626 | auto result = (src_result + dst_result) / 255; | ||
| 627 | result.r() = std::min(255, result.r()); | ||
| 628 | result.g() = std::min(255, result.g()); | ||
| 629 | result.b() = std::min(255, result.b()); | ||
| 630 | combiner_output = result.Cast<u8>(); | ||
| 631 | break; | ||
| 632 | } | ||
| 633 | |||
| 634 | case params.Subtract: | ||
| 635 | { | ||
| 636 | auto result = (src_result - dst_result) / 255; | ||
| 637 | result.r() = std::max(0, result.r()); | ||
| 638 | result.g() = std::max(0, result.g()); | ||
| 639 | result.b() = std::max(0, result.b()); | ||
| 640 | combiner_output = result.Cast<u8>(); | ||
| 641 | break; | ||
| 642 | } | ||
| 643 | |||
| 644 | case params.ReverseSubtract: | ||
| 645 | { | ||
| 646 | auto result = (dst_result - src_result) / 255; | ||
| 647 | result.r() = std::max(0, result.r()); | ||
| 648 | result.g() = std::max(0, result.g()); | ||
| 649 | result.b() = std::max(0, result.b()); | ||
| 650 | combiner_output = result.Cast<u8>(); | ||
| 651 | break; | ||
| 652 | } | ||
| 653 | |||
| 654 | case params.Min: | ||
| 655 | { | ||
| 656 | // TODO: GL spec says to do it without the factors, but is this what the 3DS does? | ||
| 657 | Math::Vec4<int> result; | ||
| 658 | result.r() = std::min(combiner_output.r(),dest.r()); | ||
| 659 | result.g() = std::min(combiner_output.g(),dest.g()); | ||
| 660 | result.b() = std::min(combiner_output.b(),dest.b()); | ||
| 661 | combiner_output = result.Cast<u8>(); | ||
| 662 | break; | ||
| 663 | } | ||
| 664 | |||
| 665 | case params.Max: | ||
| 666 | { | ||
| 667 | // TODO: GL spec says to do it without the factors, but is this what the 3DS does? | ||
| 668 | Math::Vec4<int> result; | ||
| 669 | result.r() = std::max(combiner_output.r(),dest.r()); | ||
| 670 | result.g() = std::max(combiner_output.g(),dest.g()); | ||
| 671 | result.b() = std::max(combiner_output.b(),dest.b()); | ||
| 672 | combiner_output = result.Cast<u8>(); | ||
| 673 | break; | ||
| 674 | } | ||
| 675 | 743 | ||
| 676 | default: | 744 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); |
| 677 | LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value()); | 745 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); |
| 678 | exit(0); | ||
| 679 | } | ||
| 680 | } else { | 746 | } else { |
| 681 | LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); | 747 | LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); |
| 682 | exit(0); | 748 | UNIMPLEMENTED(); |
| 683 | } | 749 | } |
| 684 | 750 | ||
| 685 | const Math::Vec4<u8> result = { | 751 | const Math::Vec4<u8> result = { |
| 686 | registers.output_merger.red_enable ? combiner_output.r() : dest.r(), | 752 | registers.output_merger.red_enable ? blend_output.r() : dest.r(), |
| 687 | registers.output_merger.green_enable ? combiner_output.g() : dest.g(), | 753 | registers.output_merger.green_enable ? blend_output.g() : dest.g(), |
| 688 | registers.output_merger.blue_enable ? combiner_output.b() : dest.b(), | 754 | registers.output_merger.blue_enable ? blend_output.b() : dest.b(), |
| 689 | registers.output_merger.alpha_enable ? combiner_output.a() : dest.a() | 755 | registers.output_merger.alpha_enable ? blend_output.a() : dest.a() |
| 690 | }; | 756 | }; |
| 691 | 757 | ||
| 692 | DrawPixel(x >> 4, y >> 4, result); | 758 | DrawPixel(x >> 4, y >> 4, result); |
| @@ -694,6 +760,12 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 694 | } | 760 | } |
| 695 | } | 761 | } |
| 696 | 762 | ||
| 763 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||
| 764 | const VertexShader::OutputVertex& v1, | ||
| 765 | const VertexShader::OutputVertex& v2) { | ||
| 766 | ProcessTriangleInternal(v0, v1, v2); | ||
| 767 | } | ||
| 768 | |||
| 697 | } // namespace Rasterizer | 769 | } // namespace Rasterizer |
| 698 | 770 | ||
| 699 | } // namespace Pica | 771 | } // namespace Pica |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 80935a50a..def868ac7 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -85,8 +85,11 @@ struct VertexShaderState { | |||
| 85 | }; | 85 | }; |
| 86 | 86 | ||
| 87 | struct CallStackElement { | 87 | struct CallStackElement { |
| 88 | u32 final_address; | 88 | u32 final_address; // Address upon which we jump to return_address |
| 89 | u32 return_address; | 89 | u32 return_address; // Where to jump when leaving scope |
| 90 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 91 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 92 | // TODO: Should this be a signed value? Does it even matter? | ||
| 90 | }; | 93 | }; |
| 91 | 94 | ||
| 92 | // TODO: Is there a maximal size for this? | 95 | // TODO: Is there a maximal size for this? |
| @@ -105,9 +108,14 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 105 | 108 | ||
| 106 | while (true) { | 109 | while (true) { |
| 107 | if (!state.call_stack.empty()) { | 110 | if (!state.call_stack.empty()) { |
| 108 | if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { | 111 | auto& top = state.call_stack.top(); |
| 109 | state.program_counter = &shader_memory[state.call_stack.top().return_address]; | 112 | if (state.program_counter - shader_memory.data() == top.final_address) { |
| 110 | state.call_stack.pop(); | 113 | state.address_registers[2] += top.loop_increment; |
| 114 | |||
| 115 | if (top.repeat_counter-- == 0) { | ||
| 116 | state.program_counter = &shader_memory[top.return_address]; | ||
| 117 | state.call_stack.pop(); | ||
| 118 | } | ||
| 111 | 119 | ||
| 112 | // TODO: Is "trying again" accurate to hardware? | 120 | // TODO: Is "trying again" accurate to hardware? |
| 113 | continue; | 121 | continue; |
| @@ -118,9 +126,10 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 118 | const Instruction& instr = *(const Instruction*)state.program_counter; | 126 | const Instruction& instr = *(const Instruction*)state.program_counter; |
| 119 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | 127 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
| 120 | 128 | ||
| 121 | auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) { | 129 | static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, |
| 130 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | ||
| 122 | state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 131 | state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 123 | state.call_stack.push({ offset + num_instructions, return_offset }); | 132 | state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment }); |
| 124 | }; | 133 | }; |
| 125 | u32 binary_offset = state.program_counter - shader_memory.data(); | 134 | u32 binary_offset = state.program_counter - shader_memory.data(); |
| 126 | 135 | ||
| @@ -457,7 +466,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 457 | call(state, | 466 | call(state, |
| 458 | instr.flow_control.dest_offset, | 467 | instr.flow_control.dest_offset, |
| 459 | instr.flow_control.num_instructions, | 468 | instr.flow_control.num_instructions, |
| 460 | binary_offset + 1); | 469 | binary_offset + 1, 0, 0); |
| 461 | break; | 470 | break; |
| 462 | 471 | ||
| 463 | case Instruction::OpCode::CALLU: | 472 | case Instruction::OpCode::CALLU: |
| @@ -465,7 +474,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 465 | call(state, | 474 | call(state, |
| 466 | instr.flow_control.dest_offset, | 475 | instr.flow_control.dest_offset, |
| 467 | instr.flow_control.num_instructions, | 476 | instr.flow_control.num_instructions, |
| 468 | binary_offset + 1); | 477 | binary_offset + 1, 0, 0); |
| 469 | } | 478 | } |
| 470 | break; | 479 | break; |
| 471 | 480 | ||
| @@ -474,7 +483,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 474 | call(state, | 483 | call(state, |
| 475 | instr.flow_control.dest_offset, | 484 | instr.flow_control.dest_offset, |
| 476 | instr.flow_control.num_instructions, | 485 | instr.flow_control.num_instructions, |
| 477 | binary_offset + 1); | 486 | binary_offset + 1, 0, 0); |
| 478 | } | 487 | } |
| 479 | break; | 488 | break; |
| 480 | 489 | ||
| @@ -486,12 +495,12 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 486 | call(state, | 495 | call(state, |
| 487 | binary_offset + 1, | 496 | binary_offset + 1, |
| 488 | instr.flow_control.dest_offset - binary_offset - 1, | 497 | instr.flow_control.dest_offset - binary_offset - 1, |
| 489 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | 498 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 490 | } else { | 499 | } else { |
| 491 | call(state, | 500 | call(state, |
| 492 | instr.flow_control.dest_offset, | 501 | instr.flow_control.dest_offset, |
| 493 | instr.flow_control.num_instructions, | 502 | instr.flow_control.num_instructions, |
| 494 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | 503 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 495 | } | 504 | } |
| 496 | 505 | ||
| 497 | break; | 506 | break; |
| @@ -504,17 +513,30 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 504 | call(state, | 513 | call(state, |
| 505 | binary_offset + 1, | 514 | binary_offset + 1, |
| 506 | instr.flow_control.dest_offset - binary_offset - 1, | 515 | instr.flow_control.dest_offset - binary_offset - 1, |
| 507 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | 516 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 508 | } else { | 517 | } else { |
| 509 | call(state, | 518 | call(state, |
| 510 | instr.flow_control.dest_offset, | 519 | instr.flow_control.dest_offset, |
| 511 | instr.flow_control.num_instructions, | 520 | instr.flow_control.num_instructions, |
| 512 | instr.flow_control.dest_offset + instr.flow_control.num_instructions); | 521 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 513 | } | 522 | } |
| 514 | 523 | ||
| 515 | break; | 524 | break; |
| 516 | } | 525 | } |
| 517 | 526 | ||
| 527 | case Instruction::OpCode::LOOP: | ||
| 528 | { | ||
| 529 | state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y; | ||
| 530 | |||
| 531 | call(state, | ||
| 532 | binary_offset + 1, | ||
| 533 | instr.flow_control.dest_offset - binary_offset + 1, | ||
| 534 | instr.flow_control.dest_offset + 1, | ||
| 535 | shader_uniforms.i[instr.flow_control.int_uniform_id].x, | ||
| 536 | shader_uniforms.i[instr.flow_control.int_uniform_id].z); | ||
| 537 | break; | ||
| 538 | } | ||
| 539 | |||
| 518 | default: | 540 | default: |
| 519 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 541 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 520 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); | 542 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); |