summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/citra_qt/debugger/graphics_cmdlists.cpp4
-rw-r--r--src/common/common.vcxproj1
-rw-r--r--src/common/common.vcxproj.filters1
-rw-r--r--src/common/register_set.h163
-rw-r--r--src/core/hle/service/gsp.cpp55
-rw-r--r--src/core/hw/gpu.cpp143
-rw-r--r--src/core/hw/gpu.h363
-rw-r--r--src/core/mem_map.cpp4
-rw-r--r--src/core/mem_map.h22
-rw-r--r--src/core/mem_map_funcs.cpp68
-rw-r--r--src/video_core/CMakeLists.txt16
-rw-r--r--src/video_core/clipper.cpp179
-rw-r--r--src/video_core/clipper.h21
-rw-r--r--src/video_core/command_processor.cpp238
-rw-r--r--src/video_core/command_processor.h31
-rw-r--r--src/video_core/gpu_debugger.h8
-rw-r--r--src/video_core/math.h578
-rw-r--r--src/video_core/pica.h633
-rw-r--r--src/video_core/primitive_assembly.cpp51
-rw-r--r--src/video_core/primitive_assembly.h21
-rw-r--r--src/video_core/rasterizer.cpp180
-rw-r--r--src/video_core/rasterizer.h21
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp18
-rw-r--r--src/video_core/vertex_shader.cpp270
-rw-r--r--src/video_core/vertex_shader.h211
-rw-r--r--src/video_core/video_core.vcxproj11
-rw-r--r--src/video_core/video_core.vcxproj.filters11
27 files changed, 2695 insertions, 627 deletions
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 195197ef5..e98560a19 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -78,12 +78,12 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const
78 // index refers to a specific command 78 // index refers to a specific command
79 const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second; 79 const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second;
80 const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index]; 80 const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index];
81 const Pica::CommandHeader& header = cmd.GetHeader(); 81 const Pica::CommandProcessor::CommandHeader& header = cmd.GetHeader();
82 82
83 if (role == Qt::DisplayRole) { 83 if (role == Qt::DisplayRole) {
84 QString content; 84 QString content;
85 if (index.column() == 0) { 85 if (index.column() == 0) {
86 content = Pica::command_names[header.cmd_id]; 86 content = QString::fromLatin1(Pica::Regs::GetCommandName(header.cmd_id).c_str());
87 content.append(" "); 87 content.append(" ");
88 } else if (index.column() == 1) { 88 } else if (index.column() == 1) {
89 for (int j = 0; j < cmd.size(); ++j) 89 for (int j = 0; j < cmd.size(); ++j)
diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj
index 1f5c714c3..341d3a813 100644
--- a/src/common/common.vcxproj
+++ b/src/common/common.vcxproj
@@ -182,7 +182,6 @@
182 <ClInclude Include="mem_arena.h" /> 182 <ClInclude Include="mem_arena.h" />
183 <ClInclude Include="msg_handler.h" /> 183 <ClInclude Include="msg_handler.h" />
184 <ClInclude Include="platform.h" /> 184 <ClInclude Include="platform.h" />
185 <ClInclude Include="register_set.h" />
186 <ClInclude Include="scm_rev.h" /> 185 <ClInclude Include="scm_rev.h" />
187 <ClInclude Include="std_condition_variable.h" /> 186 <ClInclude Include="std_condition_variable.h" />
188 <ClInclude Include="std_mutex.h" /> 187 <ClInclude Include="std_mutex.h" />
diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters
index e8c4ce360..59268ce5a 100644
--- a/src/common/common.vcxproj.filters
+++ b/src/common/common.vcxproj.filters
@@ -29,7 +29,6 @@
29 <ClInclude Include="memory_util.h" /> 29 <ClInclude Include="memory_util.h" />
30 <ClInclude Include="msg_handler.h" /> 30 <ClInclude Include="msg_handler.h" />
31 <ClInclude Include="platform.h" /> 31 <ClInclude Include="platform.h" />
32 <ClInclude Include="register_set.h" />
33 <ClInclude Include="std_condition_variable.h" /> 32 <ClInclude Include="std_condition_variable.h" />
34 <ClInclude Include="std_mutex.h" /> 33 <ClInclude Include="std_mutex.h" />
35 <ClInclude Include="std_thread.h" /> 34 <ClInclude Include="std_thread.h" />
diff --git a/src/common/register_set.h b/src/common/register_set.h
deleted file mode 100644
index ba19a2614..000000000
--- a/src/common/register_set.h
+++ /dev/null
@@ -1,163 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7// Copyright 2014 Tony Wasserka
8// All rights reserved.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18// * Neither the name of the owner nor the names of its contributors may
19// be used to endorse or promote products derived from this software
20// without specific prior written permission.
21//
22// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34/*
35 * Standardized way to define a group of registers and corresponding data structures. To define
36 * a new register set, first define struct containing an enumeration called "Id" containing
37 * all register IDs and a template struct called "Struct". Specialize the Struct struct for any
38 * register ID which needs to be accessed in a specialized way. You can then declare the object
39 * containing all register values using the RegisterSet<BaseType, DefiningStruct> type, where
40 * BaseType is the underlying type of each register (e.g. u32).
41 * Of course, you'll usually want to implement the Struct template such that they are of the same
42 * size as BaseType. However, it's also possible to make it larger, e.g. when you want to describe
43 * multiple registers with the same structure.
44 *
45 * Example:
46 *
47 * struct Regs {
48 * enum Id : u32 {
49 * Value1 = 0,
50 * Value2 = 1,
51 * Value3 = 2,
52 * NumIds = 3
53 * };
54 *
55 * // declare register definition structures
56 * template<Id id>
57 * struct Struct;
58 * };
59 *
60 * // Define register set object
61 * RegisterSet<u32, CommandIds> registers;
62 *
63 * // define register definition structures
64 * template<>
65 * struct Regs::Struct<Regs::Value1> {
66 * union {
67 * BitField<0, 4, u32> some_field;
68 * BitField<4, 3, u32> some_other_field;
69 * };
70 * };
71 *
72 * Usage in external code (within SomeNamespace scope):
73 *
74 * For a register which maps to a single index:
75 * registers.Get<Regs::Value1>().some_field = some_value;
76 *
77 * For a register which maps to different indices, e.g. a group of similar registers
78 * registers.Get<Regs::Value1>(index).some_field = some_value;
79 *
80 *
81 * @tparam BaseType Base type used for storing individual registers, e.g. u32
82 * @tparam RegDefinition Class defining an enumeration called "Id" and a template<Id id> struct, as described above.
83 * @note RegDefinition::Id needs to have an enum value called NumIds defining the number of registers to be allocated.
84 */
85template<typename BaseType, typename RegDefinition>
86struct RegisterSet {
87 // Register IDs
88 using Id = typename RegDefinition::Id;
89
90 // type used for *this
91 using ThisType = RegisterSet<BaseType, RegDefinition>;
92
93 // Register definition structs, defined in RegDefinition
94 template<Id id>
95 using Struct = typename RegDefinition::template Struct<id>;
96
97
98 /*
99 * Lookup register with the given id and return it as the corresponding structure type.
100 * @note This just forwards the arguments to Get(Id).
101 */
102 template<Id id>
103 const Struct<id>& Get() const {
104 return Get<id>(id);
105 }
106
107 /*
108 * Lookup register with the given id and return it as the corresponding structure type.
109 * @note This just forwards the arguments to Get(Id).
110 */
111 template<Id id>
112 Struct<id>& Get() {
113 return Get<id>(id);
114 }
115
116 /*
117 * Lookup register with the given index and return it as the corresponding structure type.
118 * @todo Is this portable with regards to structures larger than BaseType?
119 * @note if index==id, you don't need to specify the function parameter.
120 */
121 template<Id id>
122 const Struct<id>& Get(const Id& index) const {
123 const int idx = static_cast<size_t>(index);
124 return *reinterpret_cast<const Struct<id>*>(&raw[idx]);
125 }
126
127 /*
128 * Lookup register with the given index and return it as the corresponding structure type.
129 * @note This just forwards the arguments to the const version of Get(Id).
130 * @note if index==id, you don't need to specify the function parameter.
131 */
132 template<Id id>
133 Struct<id>& Get(const Id& index) {
134 return const_cast<Struct<id>&>(GetThis().Get<id>(index));
135 }
136
137 /*
138 * Plain array access.
139 * @note If you want to have this casted to a register defininition struct, use Get() instead.
140 */
141 const BaseType& operator[] (const Id& id) const {
142 return raw[static_cast<size_t>(id)];
143 }
144
145 /*
146 * Plain array access.
147 * @note If you want to have this casted to a register defininition struct, use Get() instead.
148 * @note This operator just forwards its argument to the const version.
149 */
150 BaseType& operator[] (const Id& id) {
151 return const_cast<BaseType&>(GetThis()[id]);
152 }
153
154private:
155 /*
156 * Returns a const reference to "this".
157 */
158 const ThisType& GetThis() const {
159 return static_cast<const ThisType&>(*this);
160 }
161
162 BaseType raw[Id::NumIds];
163};
diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp
index e241b31c8..635f50a53 100644
--- a/src/core/hle/service/gsp.cpp
+++ b/src/core/hle/service/gsp.cpp
@@ -32,7 +32,7 @@ static inline u8* GetCommandBuffer(u32 thread_id) {
32 if (0 == g_shared_memory) 32 if (0 == g_shared_memory)
33 return nullptr; 33 return nullptr;
34 34
35 return Kernel::GetSharedMemoryPointer(g_shared_memory, 35 return Kernel::GetSharedMemoryPointer(g_shared_memory,
36 0x800 + (thread_id * sizeof(CommandBuffer))); 36 0x800 + (thread_id * sizeof(CommandBuffer)));
37} 37}
38 38
@@ -173,11 +173,11 @@ void ExecuteCommand(const Command& command) {
173 case CommandId::SET_COMMAND_LIST_LAST: 173 case CommandId::SET_COMMAND_LIST_LAST:
174 { 174 {
175 auto& params = command.set_command_list_last; 175 auto& params = command.set_command_list_last;
176 WriteGPURegister(GPU::Regs::CommandProcessor + 2, params.address >> 3); 176 WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), Memory::VirtualToPhysicalAddress(params.address) >> 3);
177 WriteGPURegister(GPU::Regs::CommandProcessor, params.size >> 3); 177 WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3);
178 178
179 // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though 179 // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
180 WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); 180 WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1);
181 181
182 // TODO: Move this to GPU 182 // TODO: Move this to GPU
183 // TODO: Not sure what units the size is measured in 183 // TODO: Not sure what units the size is measured in
@@ -193,20 +193,28 @@ void ExecuteCommand(const Command& command) {
193 case CommandId::SET_MEMORY_FILL: 193 case CommandId::SET_MEMORY_FILL:
194 { 194 {
195 auto& params = command.memory_fill; 195 auto& params = command.memory_fill;
196 WriteGPURegister(GPU::Regs::MemoryFill, params.start1 >> 3); 196 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), Memory::VirtualToPhysicalAddress(params.start1) >> 3);
197 WriteGPURegister(GPU::Regs::MemoryFill + 1, params.end1 >> 3); 197 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), Memory::VirtualToPhysicalAddress(params.end1) >> 3);
198 WriteGPURegister(GPU::Regs::MemoryFill + 2, params.end1 - params.start1); 198 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1);
199 WriteGPURegister(GPU::Regs::MemoryFill + 3, params.value1); 199 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1);
200 200
201 WriteGPURegister(GPU::Regs::MemoryFill + 4, params.start2 >> 3); 201 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), Memory::VirtualToPhysicalAddress(params.start2) >> 3);
202 WriteGPURegister(GPU::Regs::MemoryFill + 5, params.end2 >> 3); 202 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), Memory::VirtualToPhysicalAddress(params.end2) >> 3);
203 WriteGPURegister(GPU::Regs::MemoryFill + 6, params.end2 - params.start2); 203 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2);
204 WriteGPURegister(GPU::Regs::MemoryFill + 7, params.value2); 204 WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2);
205 break; 205 break;
206 } 206 }
207 207
208 // TODO: Check if texture copies are implemented correctly..
209 case CommandId::SET_DISPLAY_TRANSFER: 208 case CommandId::SET_DISPLAY_TRANSFER:
209 {
210 auto& params = command.image_copy;
211 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
212 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
213 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size);
214 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size);
215 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags);
216 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1);
217
210 // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to 218 // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to
211 // work well enough for running demos. Need to figure out how these all work and trigger 219 // work well enough for running demos. Need to figure out how these all work and trigger
212 // them correctly. 220 // them correctly.
@@ -216,19 +224,20 @@ void ExecuteCommand(const Command& command) {
216 SignalInterrupt(InterruptId::P3D); 224 SignalInterrupt(InterruptId::P3D);
217 SignalInterrupt(InterruptId::DMA); 225 SignalInterrupt(InterruptId::DMA);
218 break; 226 break;
227 }
219 228
229 // TODO: Check if texture copies are implemented correctly..
220 case CommandId::SET_TEXTURE_COPY: 230 case CommandId::SET_TEXTURE_COPY:
221 { 231 {
222 auto& params = command.image_copy; 232 auto& params = command.image_copy;
223 WriteGPURegister(GPU::Regs::DisplayTransfer, params.in_buffer_address >> 3); 233 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
224 WriteGPURegister(GPU::Regs::DisplayTransfer + 1, params.out_buffer_address >> 3); 234 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
225 WriteGPURegister(GPU::Regs::DisplayTransfer + 3, params.in_buffer_size); 235 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size);
226 WriteGPURegister(GPU::Regs::DisplayTransfer + 2, params.out_buffer_size); 236 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size);
227 WriteGPURegister(GPU::Regs::DisplayTransfer + 4, params.flags); 237 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags);
228 238
229 // TODO: Should this only be ORed with 1 for texture copies? 239 // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
230 // trigger transfer 240 WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1);
231 WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1);
232 break; 241 break;
233 } 242 }
234 243
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index d94c2329b..87cf93bac 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -14,106 +14,29 @@
14 14
15#include "core/hw/gpu.h" 15#include "core/hw/gpu.h"
16 16
17#include "video_core/command_processor.h"
17#include "video_core/video_core.h" 18#include "video_core/video_core.h"
18 19
19 20
20namespace GPU { 21namespace GPU {
21 22
22RegisterSet<u32, Regs> g_regs; 23Regs g_regs;
23 24
24u32 g_cur_line = 0; ///< Current vertical screen line 25u32 g_cur_line = 0; ///< Current vertical screen line
25u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line 26u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line
26 27
27/**
28 * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM
29 * @param
30 */
31void SetFramebufferLocation(const FramebufferLocation mode) {
32 switch (mode) {
33 case FRAMEBUFFER_LOCATION_FCRAM:
34 {
35 auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
36 auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
37
38 framebuffer_top.address_left1 = PADDR_TOP_LEFT_FRAME1;
39 framebuffer_top.address_left2 = PADDR_TOP_LEFT_FRAME2;
40 framebuffer_top.address_right1 = PADDR_TOP_RIGHT_FRAME1;
41 framebuffer_top.address_right2 = PADDR_TOP_RIGHT_FRAME2;
42 framebuffer_sub.address_left1 = PADDR_SUB_FRAME1;
43 //framebuffer_sub.address_left2 = unknown;
44 framebuffer_sub.address_right1 = PADDR_SUB_FRAME2;
45 //framebuffer_sub.address_right2 = unknown;
46 break;
47 }
48
49 case FRAMEBUFFER_LOCATION_VRAM:
50 {
51 auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
52 auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
53
54 framebuffer_top.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1;
55 framebuffer_top.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2;
56 framebuffer_top.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1;
57 framebuffer_top.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2;
58 framebuffer_sub.address_left1 = PADDR_VRAM_SUB_FRAME1;
59 //framebuffer_sub.address_left2 = unknown;
60 framebuffer_sub.address_right1 = PADDR_VRAM_SUB_FRAME2;
61 //framebuffer_sub.address_right2 = unknown;
62 break;
63 }
64 }
65}
66
67/**
68 * Gets the location of the framebuffers
69 * @return Location of framebuffers as FramebufferLocation enum
70 */
71FramebufferLocation GetFramebufferLocation(u32 address) {
72 if ((address & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) {
73 return FRAMEBUFFER_LOCATION_VRAM;
74 } else if ((address & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) {
75 return FRAMEBUFFER_LOCATION_FCRAM;
76 } else {
77 ERROR_LOG(GPU, "unknown framebuffer location!");
78 }
79 return FRAMEBUFFER_LOCATION_UNKNOWN;
80}
81
82u32 GetFramebufferAddr(const u32 address) {
83 switch (GetFramebufferLocation(address)) {
84 case FRAMEBUFFER_LOCATION_FCRAM:
85 return Memory::VirtualAddressFromPhysical_FCRAM(address);
86 case FRAMEBUFFER_LOCATION_VRAM:
87 return Memory::VirtualAddressFromPhysical_VRAM(address);
88 default:
89 ERROR_LOG(GPU, "unknown framebuffer location");
90 }
91 return 0;
92}
93
94/**
95 * Gets a read-only pointer to a framebuffer in memory
96 * @param address Physical address of framebuffer
97 * @return Returns const pointer to raw framebuffer
98 */
99const u8* GetFramebufferPointer(const u32 address) {
100 u32 addr = GetFramebufferAddr(address);
101 return (addr != 0) ? Memory::GetPointer(addr) : nullptr;
102}
103
104template <typename T> 28template <typename T>
105inline void Read(T &var, const u32 raw_addr) { 29inline void Read(T &var, const u32 raw_addr) {
106 u32 addr = raw_addr - 0x1EF00000; 30 u32 addr = raw_addr - 0x1EF00000;
107 int index = addr / 4; 31 int index = addr / 4;
108 32
109 // Reads other than u32 are untested, so I'd rather have them abort than silently fail 33 // Reads other than u32 are untested, so I'd rather have them abort than silently fail
110 if (index >= Regs::NumIds || !std::is_same<T,u32>::value) 34 if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
111 {
112 ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); 35 ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr);
113 return; 36 return;
114 } 37 }
115 38
116 var = g_regs[static_cast<Regs::Id>(addr / 4)]; 39 var = g_regs[addr / 4];
117} 40}
118 41
119template <typename T> 42template <typename T>
@@ -122,28 +45,28 @@ inline void Write(u32 addr, const T data) {
122 int index = addr / 4; 45 int index = addr / 4;
123 46
124 // Writes other than u32 are untested, so I'd rather have them abort than silently fail 47 // Writes other than u32 are untested, so I'd rather have them abort than silently fail
125 if (index >= Regs::NumIds || !std::is_same<T,u32>::value) 48 if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
126 {
127 ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); 49 ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr);
128 return; 50 return;
129 } 51 }
130 52
131 g_regs[static_cast<Regs::Id>(index)] = data; 53 g_regs[index] = data;
132 54
133 switch (static_cast<Regs::Id>(index)) { 55 switch (index) {
134 56
135 // Memory fills are triggered once the fill value is written. 57 // Memory fills are triggered once the fill value is written.
136 // NOTE: This is not verified. 58 // NOTE: This is not verified.
137 case Regs::MemoryFill + 3: 59 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3):
138 case Regs::MemoryFill + 7: 60 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3):
139 { 61 {
140 const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3)); 62 const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value));
63 const auto& config = g_regs.memory_fill_config[is_second_filler];
141 64
142 // TODO: Not sure if this check should be done at GSP level instead 65 // TODO: Not sure if this check should be done at GSP level instead
143 if (config.address_start) { 66 if (config.address_start) {
144 // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all 67 // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all
145 u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); 68 u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress()));
146 u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); 69 u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress()));
147 for (u32* ptr = start; ptr < end; ++ptr) 70 for (u32* ptr = start; ptr < end; ++ptr)
148 *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation 71 *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation
149 72
@@ -152,12 +75,12 @@ inline void Write(u32 addr, const T data) {
152 break; 75 break;
153 } 76 }
154 77
155 case Regs::DisplayTransfer + 6: 78 case GPU_REG_INDEX(display_transfer_config.trigger):
156 { 79 {
157 const auto& config = g_regs.Get<Regs::DisplayTransfer>(); 80 const auto& config = g_regs.display_transfer_config;
158 if (config.trigger & 1) { 81 if (config.trigger & 1) {
159 u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress()); 82 u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress()));
160 u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress()); 83 u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress()));
161 84
162 for (int y = 0; y < config.output_height; ++y) { 85 for (int y = 0; y < config.output_height; ++y) {
163 // TODO: Why does the register seem to hold twice the framebuffer width? 86 // TODO: Why does the register seem to hold twice the framebuffer width?
@@ -221,14 +144,15 @@ inline void Write(u32 addr, const T data) {
221 break; 144 break;
222 } 145 }
223 146
224 case Regs::CommandProcessor + 4: 147 // Seems like writing to this register triggers processing
148 case GPU_REG_INDEX(command_processor_config.trigger):
225 { 149 {
226 const auto& config = g_regs.Get<Regs::CommandProcessor>(); 150 const auto& config = g_regs.command_processor_config;
227 if (config.trigger & 1) 151 if (config.trigger & 1)
228 { 152 {
229 // u32* buffer = (u32*)Memory::GetPointer(config.address << 3); 153 u32* buffer = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalAddress()));
230 ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3); 154 u32 size = config.size << 3;
231 // TODO: Process command list! 155 Pica::CommandProcessor::ProcessCommandList(buffer, size);
232 } 156 }
233 break; 157 break;
234 } 158 }
@@ -252,7 +176,7 @@ template void Write<u8>(u32 addr, const u8 data);
252 176
253/// Update hardware 177/// Update hardware
254void Update() { 178void Update() {
255 auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); 179 auto& framebuffer_top = g_regs.framebuffer_config[0];
256 u64 current_ticks = Core::g_app_core->GetTicks(); 180 u64 current_ticks = Core::g_app_core->GetTicks();
257 181
258 // Synchronize line... 182 // Synchronize line...
@@ -277,11 +201,22 @@ void Init() {
277 g_cur_line = 0; 201 g_cur_line = 0;
278 g_last_line_ticks = Core::g_app_core->GetTicks(); 202 g_last_line_ticks = Core::g_app_core->GetTicks();
279 203
280// SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); 204 auto& framebuffer_top = g_regs.framebuffer_config[0];
281 SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); 205 auto& framebuffer_sub = g_regs.framebuffer_config[1];
206
207 // Setup default framebuffer addresses (located in VRAM)
208 // .. or at least these are the ones used by system applets.
209 // There's probably a smarter way to come up with addresses
210 // like this which does not require hardcoding.
211 framebuffer_top.address_left1 = 0x181E6000;
212 framebuffer_top.address_left2 = 0x1822C800;
213 framebuffer_top.address_right1 = 0x18273000;
214 framebuffer_top.address_right2 = 0x182B9800;
215 framebuffer_sub.address_left1 = 0x1848F000;
216 //framebuffer_sub.address_left2 = unknown;
217 framebuffer_sub.address_right1 = 0x184C7800;
218 //framebuffer_sub.address_right2 = unknown;
282 219
283 auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
284 auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
285 // TODO: Width should be 240 instead? 220 // TODO: Width should be 240 instead?
286 framebuffer_top.width = 480; 221 framebuffer_top.width = 480;
287 framebuffer_top.height = 400; 222 framebuffer_top.height = 400;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 42f18a0e7..d20311a00 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -4,32 +4,57 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/bit_field.h" 10#include "common/bit_field.h"
9#include "common/register_set.h"
10 11
11namespace GPU { 12namespace GPU {
12 13
13static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second 14static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second
14static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame 15static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame
15 16
17// Returns index corresponding to the Regs member labeled by field_name
18// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
19// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
20// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
21// Hopefully, this will be fixed sometime in the future.
22// For lack of better alternatives, we currently hardcode the offsets when constant
23// expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
24// will then make sure the offsets indeed match the automatically calculated ones).
25#define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32))
26#if defined(_MSC_VER)
27#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
28#else
29// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
30// really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
31// and then performs a (no-op) cast to size_t iff the second argument matches the expected
32// field offset. Otherwise, the compiler will fail to compile this code.
33#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
34 ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name))
35#endif
36
16// MMIO region 0x1EFxxxxx 37// MMIO region 0x1EFxxxxx
17struct Regs { 38struct Regs {
18 enum Id : u32 {
19 MemoryFill = 0x00004, // + 5,6,7; second block at 8-11
20
21 FramebufferTop = 0x00117, // + 11a,11b,11c,11d(?),11e...126
22 FramebufferBottom = 0x00157, // + 15a,15b,15c,15d(?),15e...166
23 39
24 DisplayTransfer = 0x00300, // + 301,302,303,304,305,306 40// helper macro to properly align structure members.
25 41// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121",
26 CommandProcessor = 0x00638, // + 63a,63c 42// depending on the current source line to make sure variable names are unique.
27 43#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
28 NumIds = 0x01000 44#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
29 }; 45#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
30 46
31 template<Id id> 47// helper macro to make sure the defined structures are of the expected size.
32 struct Struct; 48#if defined(_MSC_VER)
49// TODO: MSVC does not support using sizeof() on non-static data members even though this
50// is technically allowed since C++11. This macro should be enabled once MSVC adds
51// support for that.
52#define ASSERT_MEMBER_SIZE(name, size_in_bytes)
53#else
54#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \
55 static_assert(sizeof(name) == size_in_bytes, \
56 "Structure size and register block length don't match");
57#endif
33 58
34 enum class FramebufferFormat : u32 { 59 enum class FramebufferFormat : u32 {
35 RGBA8 = 0, 60 RGBA8 = 0,
@@ -38,201 +63,191 @@ struct Regs {
38 RGB5A1 = 3, 63 RGB5A1 = 3,
39 RGBA4 = 4, 64 RGBA4 = 4,
40 }; 65 };
41};
42 66
43template<> 67 INSERT_PADDING_WORDS(0x4);
44struct Regs::Struct<Regs::MemoryFill> {
45 u32 address_start;
46 u32 address_end; // ?
47 u32 size;
48 u32 value; // ?
49 68
50 inline u32 GetStartAddress() const { 69 struct {
51 return address_start * 8; 70 u32 address_start;
52 } 71 u32 address_end; // ?
72 u32 size;
73 u32 value; // ?
53 74
54 inline u32 GetEndAddress() const { 75 inline u32 GetStartAddress() const {
55 return address_end * 8; 76 return DecodeAddressRegister(address_start);
56 } 77 }
57};
58static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match");
59 78
60template<> 79 inline u32 GetEndAddress() const {
61struct Regs::Struct<Regs::FramebufferTop> { 80 return DecodeAddressRegister(address_end);
62 using Format = Regs::FramebufferFormat; 81 }
82 } memory_fill_config[2];
83 ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10);
63 84
64 union { 85 INSERT_PADDING_WORDS(0x10b);
65 u32 size;
66 86
67 BitField< 0, 16, u32> width; 87 struct {
68 BitField<16, 16, u32> height; 88 using Format = Regs::FramebufferFormat;
69 };
70 89
71 u32 pad0[2]; 90 union {
91 u32 size;
72 92
73 u32 address_left1; 93 BitField< 0, 16, u32> width;
74 u32 address_left2; 94 BitField<16, 16, u32> height;
95 };
75 96
76 union { 97 INSERT_PADDING_WORDS(0x2);
77 u32 format;
78 98
79 BitField< 0, 3, Format> color_format; 99 u32 address_left1;
80 }; 100 u32 address_left2;
81 101
82 u32 pad1; 102 union {
103 u32 format;
83 104
84 union { 105 BitField< 0, 3, Format> color_format;
85 u32 active_fb; 106 };
86 107
87 // 0: Use parameters ending with "1" 108 INSERT_PADDING_WORDS(0x1);
88 // 1: Use parameters ending with "2"
89 BitField<0, 1, u32> second_fb_active;
90 };
91 109
92 u32 pad2[5]; 110 union {
111 u32 active_fb;
93 112
94 // Distance between two pixel rows, in bytes 113 // 0: Use parameters ending with "1"
95 u32 stride; 114 // 1: Use parameters ending with "2"
115 BitField<0, 1, u32> second_fb_active;
116 };
96 117
97 u32 address_right1; 118 INSERT_PADDING_WORDS(0x5);
98 u32 address_right2;
99};
100 119
101template<> 120 // Distance between two pixel rows, in bytes
102struct Regs::Struct<Regs::FramebufferBottom> : public Regs::Struct<Regs::FramebufferTop> { 121 u32 stride;
103};
104static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match");
105 122
106template<> 123 u32 address_right1;
107struct Regs::Struct<Regs::DisplayTransfer> { 124 u32 address_right2;
108 using Format = Regs::FramebufferFormat;
109 125
110 u32 input_address; 126 INSERT_PADDING_WORDS(0x30);
111 u32 output_address; 127 } framebuffer_config[2];
128 ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100);
112 129
113 inline u32 GetPhysicalInputAddress() const { 130 INSERT_PADDING_WORDS(0x169);
114 return input_address * 8;
115 }
116 131
117 inline u32 GetPhysicalOutputAddress() const { 132 struct {
118 return output_address * 8; 133 using Format = Regs::FramebufferFormat;
119 }
120 134
121 union { 135 u32 input_address;
122 u32 output_size; 136 u32 output_address;
123 137
124 BitField< 0, 16, u32> output_width; 138 inline u32 GetPhysicalInputAddress() const {
125 BitField<16, 16, u32> output_height; 139 return DecodeAddressRegister(input_address);
126 }; 140 }
127 141
128 union { 142 inline u32 GetPhysicalOutputAddress() const {
129 u32 input_size; 143 return DecodeAddressRegister(output_address);
144 }
130 145
131 BitField< 0, 16, u32> input_width; 146 union {
132 BitField<16, 16, u32> input_height; 147 u32 output_size;
133 };
134 148
135 union { 149 BitField< 0, 16, u32> output_width;
136 u32 flags; 150 BitField<16, 16, u32> output_height;
151 };
137 152
138 BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true 153 union {
139 BitField< 8, 3, Format> input_format; 154 u32 input_size;
140 BitField<12, 3, Format> output_format;
141 BitField<16, 1, u32> output_tiled; // stores output in a tiled format
142 };
143 155
144 u32 unknown; 156 BitField< 0, 16, u32> input_width;
157 BitField<16, 16, u32> input_height;
158 };
145 159
146 // it seems that writing to this field triggers the display transfer 160 union {
147 u32 trigger; 161 u32 flags;
148};
149static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match");
150 162
151template<> 163 BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true
152struct Regs::Struct<Regs::CommandProcessor> { 164 BitField< 8, 3, Format> input_format;
153 // command list size 165 BitField<12, 3, Format> output_format;
154 u32 size; 166 BitField<16, 1, u32> output_tiled; // stores output in a tiled format
167 };
155 168
156 u32 pad0; 169 INSERT_PADDING_WORDS(0x1);
157 170
158 // command list address 171 // it seems that writing to this field triggers the display transfer
159 u32 address; 172 u32 trigger;
173 } display_transfer_config;
174 ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
160 175
161 u32 pad1; 176 INSERT_PADDING_WORDS(0x331);
162 177
163 // it seems that writing to this field triggers command list processing 178 struct {
164 u32 trigger; 179 // command list size
165}; 180 u32 size;
166static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match");
167
168
169extern RegisterSet<u32, Regs> g_regs;
170
171enum {
172 TOP_ASPECT_X = 0x5,
173 TOP_ASPECT_Y = 0x3,
174
175 TOP_HEIGHT = 240,
176 TOP_WIDTH = 400,
177 BOTTOM_WIDTH = 320,
178
179 // Physical addresses in FCRAM (chosen arbitrarily)
180 PADDR_TOP_LEFT_FRAME1 = 0x201D4C00,
181 PADDR_TOP_LEFT_FRAME2 = 0x202D4C00,
182 PADDR_TOP_RIGHT_FRAME1 = 0x203D4C00,
183 PADDR_TOP_RIGHT_FRAME2 = 0x204D4C00,
184 PADDR_SUB_FRAME1 = 0x205D4C00,
185 PADDR_SUB_FRAME2 = 0x206D4C00,
186 // Physical addresses in FCRAM used by ARM9 applications
187/* PADDR_TOP_LEFT_FRAME1 = 0x20184E60,
188 PADDR_TOP_LEFT_FRAME2 = 0x201CB370,
189 PADDR_TOP_RIGHT_FRAME1 = 0x20282160,
190 PADDR_TOP_RIGHT_FRAME2 = 0x202C8670,
191 PADDR_SUB_FRAME1 = 0x202118E0,
192 PADDR_SUB_FRAME2 = 0x20249CF0,*/
193
194 // Physical addresses in VRAM
195 // TODO: These should just be deduced from the ones above
196 PADDR_VRAM_TOP_LEFT_FRAME1 = 0x181D4C00,
197 PADDR_VRAM_TOP_LEFT_FRAME2 = 0x182D4C00,
198 PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x183D4C00,
199 PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x184D4C00,
200 PADDR_VRAM_SUB_FRAME1 = 0x185D4C00,
201 PADDR_VRAM_SUB_FRAME2 = 0x186D4C00,
202 // Physical addresses in VRAM used by ARM9 applications
203/* PADDR_VRAM_TOP_LEFT_FRAME2 = 0x181CB370,
204 PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160,
205 PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670,
206 PADDR_VRAM_SUB_FRAME1 = 0x182118E0,
207 PADDR_VRAM_SUB_FRAME2 = 0x18249CF0,*/
208};
209 181
210/// Framebuffer location 182 INSERT_PADDING_WORDS(0x1);
211enum FramebufferLocation { 183
212 FRAMEBUFFER_LOCATION_UNKNOWN, ///< Framebuffer location is unknown 184 // command list address
213 FRAMEBUFFER_LOCATION_FCRAM, ///< Framebuffer is in the GSP heap 185 u32 address;
214 FRAMEBUFFER_LOCATION_VRAM, ///< Framebuffer is in VRAM 186
215}; 187 INSERT_PADDING_WORDS(0x1);
188
189 // it seems that writing to this field triggers command list processing
190 u32 trigger;
191
192 inline u32 GetPhysicalAddress() const {
193 return DecodeAddressRegister(address);
194 }
195 } command_processor_config;
196 ASSERT_MEMBER_SIZE(command_processor_config, 0x14);
216 197
217/** 198 INSERT_PADDING_WORDS(0x9c3);
218 * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM 199
219 * @param 200#undef INSERT_PADDING_WORDS_HELPER1
220 */ 201#undef INSERT_PADDING_WORDS_HELPER2
221void SetFramebufferLocation(const FramebufferLocation mode); 202#undef INSERT_PADDING_WORDS
222 203
223/** 204 static inline int NumIds() {
224 * Gets a read-only pointer to a framebuffer in memory 205 return sizeof(Regs) / sizeof(u32);
225 * @param address Physical address of framebuffer 206 }
226 * @return Returns const pointer to raw framebuffer 207
227 */ 208 u32& operator [] (int index) const {
228const u8* GetFramebufferPointer(const u32 address); 209 u32* content = (u32*)this;
229 210 return content[index];
230u32 GetFramebufferAddr(const u32 address); 211 }
231 212
232/** 213 u32& operator [] (int index) {
233 * Gets the location of the framebuffers 214 u32* content = (u32*)this;
234 */ 215 return content[index];
235FramebufferLocation GetFramebufferLocation(u32 address); 216 }
217
218private:
219 /*
220 * Most physical addresses which GPU registers refer to are 8-byte aligned.
221 * This function should be used to get the address from a raw register value.
222 */
223 static inline u32 DecodeAddressRegister(u32 register_value) {
224 return register_value * 8;
225 }
226};
227static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
228
229// TODO: MSVC does not support using offsetof() on non-static data members even though this
230// is technically allowed since C++11. This macro should be enabled once MSVC adds
231// support for that.
232#ifndef _MSC_VER
233#define ASSERT_REG_POSITION(field_name, position) \
234 static_assert(offsetof(Regs, field_name) == position * 4, \
235 "Field "#field_name" has invalid position")
236
237ASSERT_REG_POSITION(memory_fill_config[0], 0x00004);
238ASSERT_REG_POSITION(memory_fill_config[1], 0x00008);
239ASSERT_REG_POSITION(framebuffer_config[0], 0x00117);
240ASSERT_REG_POSITION(framebuffer_config[1], 0x00157);
241ASSERT_REG_POSITION(display_transfer_config, 0x00300);
242ASSERT_REG_POSITION(command_processor_config, 0x00638);
243
244#undef ASSERT_REG_POSITION
245#endif // !defined(_MSC_VER)
246
247// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
248static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
249
250extern Regs g_regs;
236 251
237template <typename T> 252template <typename T>
238void Read(T &var, const u32 addr); 253void Read(T &var, const u32 addr);
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp
index c45746be9..14fc01471 100644
--- a/src/core/mem_map.cpp
+++ b/src/core/mem_map.cpp
@@ -72,14 +72,14 @@ void Init() {
72 72
73 g_base = MemoryMap_Setup(g_views, kNumMemViews, flags, &g_arena); 73 g_base = MemoryMap_Setup(g_views, kNumMemViews, flags, &g_arena);
74 74
75 NOTICE_LOG(MEMMAP, "initialized OK, RAM at %p (mirror at 0 @ %p)", g_heap, 75 NOTICE_LOG(MEMMAP, "initialized OK, RAM at %p (mirror at 0 @ %p)", g_heap,
76 g_physical_fcram); 76 g_physical_fcram);
77} 77}
78 78
79void Shutdown() { 79void Shutdown() {
80 u32 flags = 0; 80 u32 flags = 0;
81 MemoryMap_Shutdown(g_views, kNumMemViews, flags, &g_arena); 81 MemoryMap_Shutdown(g_views, kNumMemViews, flags, &g_arena);
82 82
83 g_arena.ReleaseSpace(); 83 g_arena.ReleaseSpace();
84 g_base = NULL; 84 g_base = NULL;
85 85
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
index 12941f558..3c7810573 100644
--- a/src/core/mem_map.h
+++ b/src/core/mem_map.h
@@ -14,7 +14,6 @@ namespace Memory {
14enum { 14enum {
15 BOOTROM_SIZE = 0x00010000, ///< Bootrom (super secret code/data @ 0x8000) size 15 BOOTROM_SIZE = 0x00010000, ///< Bootrom (super secret code/data @ 0x8000) size
16 MPCORE_PRIV_SIZE = 0x00002000, ///< MPCore private memory region size 16 MPCORE_PRIV_SIZE = 0x00002000, ///< MPCore private memory region size
17 VRAM_SIZE = 0x00600000, ///< VRAM size
18 DSP_SIZE = 0x00080000, ///< DSP memory size 17 DSP_SIZE = 0x00080000, ///< DSP memory size
19 AXI_WRAM_SIZE = 0x00080000, ///< AXI WRAM size 18 AXI_WRAM_SIZE = 0x00080000, ///< AXI WRAM size
20 19
@@ -23,8 +22,6 @@ enum {
23 FCRAM_PADDR_END = (FCRAM_PADDR + FCRAM_SIZE), ///< FCRAM end of physical space 22 FCRAM_PADDR_END = (FCRAM_PADDR + FCRAM_SIZE), ///< FCRAM end of physical space
24 FCRAM_VADDR = 0x08000000, ///< FCRAM virtual address 23 FCRAM_VADDR = 0x08000000, ///< FCRAM virtual address
25 FCRAM_VADDR_END = (FCRAM_VADDR + FCRAM_SIZE), ///< FCRAM end of virtual space 24 FCRAM_VADDR_END = (FCRAM_VADDR + FCRAM_SIZE), ///< FCRAM end of virtual space
26 FCRAM_VADDR_FW0B = 0xF0000000, ///< FCRAM adress for firmare FW0B
27 FCRAM_VADDR_FW0B_END = (FCRAM_VADDR_FW0B + FCRAM_SIZE), ///< FCRAM adress end for FW0B
28 FCRAM_MASK = (FCRAM_SIZE - 1), ///< FCRAM mask 25 FCRAM_MASK = (FCRAM_SIZE - 1), ///< FCRAM mask
29 26
30 SHARED_MEMORY_SIZE = 0x04000000, ///< Shared memory size 27 SHARED_MEMORY_SIZE = 0x04000000, ///< Shared memory size
@@ -73,6 +70,7 @@ enum {
73 HARDWARE_IO_PADDR_END = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE), 70 HARDWARE_IO_PADDR_END = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE),
74 HARDWARE_IO_VADDR_END = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE), 71 HARDWARE_IO_VADDR_END = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE),
75 72
73 VRAM_SIZE = 0x00600000,
76 VRAM_PADDR = 0x18000000, 74 VRAM_PADDR = 0x18000000,
77 VRAM_VADDR = 0x1F000000, 75 VRAM_VADDR = 0x1F000000,
78 VRAM_PADDR_END = (VRAM_PADDR + VRAM_SIZE), 76 VRAM_PADDR_END = (VRAM_PADDR + VRAM_SIZE),
@@ -112,7 +110,7 @@ struct MemoryBlock {
112 110
113// In 64-bit, this might point to "high memory" (above the 32-bit limit), 111// In 64-bit, this might point to "high memory" (above the 32-bit limit),
114// so be sure to load it into a 64-bit register. 112// so be sure to load it into a 64-bit register.
115extern u8 *g_base; 113extern u8 *g_base;
116 114
117// These are guaranteed to point to "low memory" addresses (sub-32-bit). 115// These are guaranteed to point to "low memory" addresses (sub-32-bit).
118// 64-bit: Pointers to low-mem (sub-0x10000000) mirror 116// 64-bit: Pointers to low-mem (sub-0x10000000) mirror
@@ -147,7 +145,7 @@ void Write32(const u32 addr, const u32 data);
147 145
148void WriteBlock(const u32 addr, const u8* data, const int size); 146void WriteBlock(const u32 addr, const u8* data, const int size);
149 147
150u8* GetPointer(const u32 Address); 148u8* GetPointer(const u32 virtual_address);
151 149
152/** 150/**
153 * Maps a block of memory on the heap 151 * Maps a block of memory on the heap
@@ -169,16 +167,10 @@ inline const char* GetCharPointer(const u32 address) {
169 return (const char *)GetPointer(address); 167 return (const char *)GetPointer(address);
170} 168}
171 169
172inline const u32 VirtualAddressFromPhysical_FCRAM(const u32 address) { 170/// Converts a physical address to virtual address
173 return ((address & FCRAM_MASK) | FCRAM_VADDR); 171u32 PhysicalToVirtualAddress(const u32 addr);
174}
175
176inline const u32 VirtualAddressFromPhysical_IO(const u32 address) {
177 return (address + 0x0EB00000);
178}
179 172
180inline const u32 VirtualAddressFromPhysical_VRAM(const u32 address) { 173/// Converts a virtual address to physical address
181 return (address + 0x07000000); 174u32 VirtualToPhysicalAddress(const u32 addr);
182}
183 175
184} // namespace 176} // namespace
diff --git a/src/core/mem_map_funcs.cpp b/src/core/mem_map_funcs.cpp
index 305be8468..5772cca52 100644
--- a/src/core/mem_map_funcs.cpp
+++ b/src/core/mem_map_funcs.cpp
@@ -17,37 +17,44 @@ std::map<u32, MemoryBlock> g_heap_map;
17std::map<u32, MemoryBlock> g_heap_gsp_map; 17std::map<u32, MemoryBlock> g_heap_gsp_map;
18std::map<u32, MemoryBlock> g_shared_map; 18std::map<u32, MemoryBlock> g_shared_map;
19 19
20/// Convert a physical address (or firmware-specific virtual address) to primary virtual address 20/// Convert a physical address to virtual address
21u32 _VirtualAddress(const u32 addr) { 21u32 PhysicalToVirtualAddress(const u32 addr) {
22 // Our memory interface read/write functions assume virtual addresses. Put any physical address 22 // Our memory interface read/write functions assume virtual addresses. Put any physical address
23 // to virtual address translations here. This is obviously quite hacky... But we're not doing 23 // to virtual address translations here. This is quite hacky, but necessary until we implement
24 // any MMU emulation yet or anything 24 // proper MMU emulation.
25 if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) { 25 // TODO: Screw it, I'll let bunnei figure out how to do this properly.
26 return VirtualAddressFromPhysical_FCRAM(addr); 26 if ((addr >= VRAM_PADDR) && (addr < VRAM_PADDR_END)) {
27 27 return addr - VRAM_PADDR + VRAM_VADDR;
28 // Virtual address mapping FW0B 28 }else if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) {
29 } else if ((addr >= FCRAM_VADDR_FW0B) && (addr < FCRAM_VADDR_FW0B_END)) { 29 return addr - FCRAM_PADDR + FCRAM_VADDR;
30 return VirtualAddressFromPhysical_FCRAM(addr); 30 }
31 31
32 // Hardware IO 32 ERROR_LOG(MEMMAP, "Unknown physical address @ 0x%08x", addr);
33 // TODO(bunnei): FixMe 33 return addr;
34 // This isn't going to work... The physical address of HARDWARE_IO conflicts with the virtual 34}
35 // address of shared memory.
36 //} else if ((addr >= HARDWARE_IO_PADDR) && (addr < HARDWARE_IO_PADDR_END)) {
37 // return (addr + 0x0EB00000);
38 35
36/// Convert a physical address to virtual address
37u32 VirtualToPhysicalAddress(const u32 addr) {
38 // Our memory interface read/write functions assume virtual addresses. Put any physical address
39 // to virtual address translations here. This is quite hacky, but necessary until we implement
40 // proper MMU emulation.
41 // TODO: Screw it, I'll let bunnei figure out how to do this properly.
42 if ((addr >= VRAM_VADDR) && (addr < VRAM_VADDR_END)) {
43 return addr - 0x07000000;
44 } else if ((addr >= FCRAM_VADDR) && (addr < FCRAM_VADDR_END)) {
45 return addr - FCRAM_VADDR + FCRAM_PADDR;
39 } 46 }
47
48 ERROR_LOG(MEMMAP, "Unknown virtual address @ 0x%08x", addr);
40 return addr; 49 return addr;
41} 50}
42 51
43template <typename T> 52template <typename T>
44inline void Read(T &var, const u32 addr) { 53inline void Read(T &var, const u32 vaddr) {
45 // TODO: Figure out the fastest order of tests for both read and write (they are probably different). 54 // TODO: Figure out the fastest order of tests for both read and write (they are probably different).
46 // TODO: Make sure this represents the mirrors in a correct way. 55 // TODO: Make sure this represents the mirrors in a correct way.
47 // Could just do a base-relative read, too.... TODO 56 // Could just do a base-relative read, too.... TODO
48 57
49 const u32 vaddr = _VirtualAddress(addr);
50
51 // Kernel memory command buffer 58 // Kernel memory command buffer
52 if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { 59 if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
53 var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]); 60 var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]);
@@ -91,9 +98,8 @@ inline void Read(T &var, const u32 addr) {
91} 98}
92 99
93template <typename T> 100template <typename T>
94inline void Write(u32 addr, const T data) { 101inline void Write(u32 vaddr, const T data) {
95 u32 vaddr = _VirtualAddress(addr); 102
96
97 // Kernel memory command buffer 103 // Kernel memory command buffer
98 if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { 104 if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
99 *(T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK] = data; 105 *(T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK] = data;
@@ -133,16 +139,14 @@ inline void Write(u32 addr, const T data) {
133 // _assert_msg_(MEMMAP, false, "umimplemented write to Configuration Memory"); 139 // _assert_msg_(MEMMAP, false, "umimplemented write to Configuration Memory");
134 //} else if ((vaddr & 0xFFFFF000) == 0x1FF81000) { 140 //} else if ((vaddr & 0xFFFFF000) == 0x1FF81000) {
135 // _assert_msg_(MEMMAP, false, "umimplemented write to shared page"); 141 // _assert_msg_(MEMMAP, false, "umimplemented write to shared page");
136 142
137 // Error out... 143 // Error out...
138 } else { 144 } else {
139 ERROR_LOG(MEMMAP, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, vaddr); 145 ERROR_LOG(MEMMAP, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, vaddr);
140 } 146 }
141} 147}
142 148
143u8 *GetPointer(const u32 addr) { 149u8 *GetPointer(const u32 vaddr) {
144 const u32 vaddr = _VirtualAddress(addr);
145
146 // Kernel memory command buffer 150 // Kernel memory command buffer
147 if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { 151 if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
148 return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK); 152 return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK);
@@ -185,12 +189,12 @@ u8 *GetPointer(const u32 addr) {
185 */ 189 */
186u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) { 190u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) {
187 MemoryBlock block; 191 MemoryBlock block;
188 192
189 block.base_address = HEAP_VADDR; 193 block.base_address = HEAP_VADDR;
190 block.size = size; 194 block.size = size;
191 block.operation = operation; 195 block.operation = operation;
192 block.permissions = permissions; 196 block.permissions = permissions;
193 197
194 if (g_heap_map.size() > 0) { 198 if (g_heap_map.size() > 0) {
195 const MemoryBlock last_block = g_heap_map.rbegin()->second; 199 const MemoryBlock last_block = g_heap_map.rbegin()->second;
196 block.address = last_block.address + last_block.size; 200 block.address = last_block.address + last_block.size;
@@ -208,12 +212,12 @@ u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) {
208 */ 212 */
209u32 MapBlock_HeapGSP(u32 size, u32 operation, u32 permissions) { 213u32 MapBlock_HeapGSP(u32 size, u32 operation, u32 permissions) {
210 MemoryBlock block; 214 MemoryBlock block;
211 215
212 block.base_address = HEAP_GSP_VADDR; 216 block.base_address = HEAP_GSP_VADDR;
213 block.size = size; 217 block.size = size;
214 block.operation = operation; 218 block.operation = operation;
215 block.permissions = permissions; 219 block.permissions = permissions;
216 220
217 if (g_heap_gsp_map.size() > 0) { 221 if (g_heap_gsp_map.size() > 0) {
218 const MemoryBlock last_block = g_heap_gsp_map.rbegin()->second; 222 const MemoryBlock last_block = g_heap_gsp_map.rbegin()->second;
219 block.address = last_block.address + last_block.size; 223 block.address = last_block.address + last_block.size;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e43e6e1bb..8e7b93acb 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,10 +1,22 @@
1set(SRCS video_core.cpp 1set(SRCS clipper.cpp
2 command_processor.cpp
3 primitive_assembly.cpp
4 rasterizer.cpp
2 utils.cpp 5 utils.cpp
6 vertex_shader.cpp
7 video_core.cpp
3 renderer_opengl/renderer_opengl.cpp) 8 renderer_opengl/renderer_opengl.cpp)
4 9
5set(HEADERS video_core.h 10set(HEADERS clipper.h
11 command_processor.h
12 math.h
13 primitive_assembly.h
14 rasterizer.h
6 utils.h 15 utils.h
16 video_core.h
7 renderer_base.h 17 renderer_base.h
18 vertex_shader.h
19 video_core.h
8 renderer_opengl/renderer_opengl.h) 20 renderer_opengl/renderer_opengl.h)
9 21
10add_library(video_core STATIC ${SRCS} ${HEADERS}) 22add_library(video_core STATIC ${SRCS} ${HEADERS})
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
new file mode 100644
index 000000000..b7180328c
--- /dev/null
+++ b/src/video_core/clipper.cpp
@@ -0,0 +1,179 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "clipper.h"
8#include "pica.h"
9#include "rasterizer.h"
10#include "vertex_shader.h"
11
12namespace Pica {
13
14namespace Clipper {
15
16struct ClippingEdge {
17public:
18 enum Type {
19 POS_X = 0,
20 NEG_X = 1,
21 POS_Y = 2,
22 NEG_Y = 3,
23 POS_Z = 4,
24 NEG_Z = 5,
25 };
26
27 ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
28
29 bool IsInside(const OutputVertex& vertex) const {
30 switch (type) {
31 case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
32 case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
33 case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
34 case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
35
36 // TODO: Check z compares ... should be 0..1 instead?
37 case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
38
39 default:
40 case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
41 }
42 }
43
44 bool IsOutSide(const OutputVertex& vertex) const {
45 return !IsInside(vertex);
46 }
47
48 OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
49 auto dotpr = [this](const OutputVertex& vtx) {
50 switch (type) {
51 case POS_X: return vtx.pos.x - vtx.pos.w;
52 case NEG_X: return -vtx.pos.x - vtx.pos.w;
53 case POS_Y: return vtx.pos.y - vtx.pos.w;
54 case NEG_Y: return -vtx.pos.y - vtx.pos.w;
55
56 // TODO: Verify z clipping
57 case POS_Z: return vtx.pos.z - vtx.pos.w;
58
59 default:
60 case NEG_Z: return -vtx.pos.w;
61 }
62 };
63
64 float24 dp = dotpr(v0);
65 float24 dp_prev = dotpr(v1);
66 float24 factor = dp_prev / (dp_prev - dp);
67
68 return OutputVertex::Lerp(factor, v0, v1);
69 }
70
71private:
72 Type type;
73 float24 pos;
74};
75
76static void InitScreenCoordinates(OutputVertex& vtx)
77{
78 struct {
79 float24 halfsize_x;
80 float24 offset_x;
81 float24 halfsize_y;
82 float24 offset_y;
83 float24 zscale;
84 float24 offset_z;
85 } viewport;
86
87 viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x);
88 viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y);
89 viewport.offset_x = float24::FromFloat32(registers.viewport_corner.x);
90 viewport.offset_y = float24::FromFloat32(registers.viewport_corner.y);
91 viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range);
92 viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane);
93
94 // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
95 vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x / float24::FromFloat32(2.0) + viewport.offset_x;
96 vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
97 vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale;
98}
99
100void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
101
102 // TODO (neobrain):
103 // The list of output vertices has some fixed maximum size,
104 // however I haven't taken the time to figure out what it is exactly.
105 // For now, we hence just assume a maximal size of 1000 vertices.
106 const size_t max_vertices = 1000;
107 std::vector<OutputVertex> buffer_vertices;
108 std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 };
109
110 // Make sure to reserve space for all vertices.
111 // Without this, buffer reallocation would invalidate references.
112 buffer_vertices.reserve(max_vertices);
113
114 // Simple implementation of the Sutherland-Hodgman clipping algorithm.
115 // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
116 for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)),
117 ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
118 ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
119 ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
120 ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
121 ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
122
123 const std::vector<OutputVertex*> input_list = output_list;
124 output_list.clear();
125
126 const OutputVertex* reference_vertex = input_list.back();
127
128 for (const auto& vertex : input_list) {
129 // NOTE: This algorithm changes vertex order in some cases!
130 if (edge.IsInside(*vertex)) {
131 if (edge.IsOutSide(*reference_vertex)) {
132 buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex));
133 output_list.push_back(&(buffer_vertices.back()));
134 }
135
136 output_list.push_back(vertex);
137 } else if (edge.IsInside(*reference_vertex)) {
138 buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex));
139 output_list.push_back(&(buffer_vertices.back()));
140 }
141
142 reference_vertex = vertex;
143 }
144
145 // Need to have at least a full triangle to continue...
146 if (output_list.size() < 3)
147 return;
148 }
149
150 InitScreenCoordinates(*(output_list[0]));
151 InitScreenCoordinates(*(output_list[1]));
152
153 for (int i = 0; i < output_list.size() - 2; i ++) {
154 OutputVertex& vtx0 = *(output_list[0]);
155 OutputVertex& vtx1 = *(output_list[i+1]);
156 OutputVertex& vtx2 = *(output_list[i+2]);
157
158 InitScreenCoordinates(vtx2);
159
160 DEBUG_LOG(GPU,
161 "Triangle %d/%d (%d buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), "
162 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
163 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
164 i,output_list.size(), buffer_vertices.size(),
165 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),output_list.size(),
166 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
167 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(),
168 vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(),
169 vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(),
170 vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32());
171
172 Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2);
173 }
174}
175
176
177} // namespace
178
179} // namespace
diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h
new file mode 100644
index 000000000..14d31ca1e
--- /dev/null
+++ b/src/video_core/clipper.h
@@ -0,0 +1,21 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Pica {
8
9namespace VertexShader {
10 struct OutputVertex;
11}
12
13namespace Clipper {
14
15using VertexShader::OutputVertex;
16
17void ProcessTriangle(OutputVertex& v0, OutputVertex& v1, OutputVertex& v2);
18
19} // namespace
20
21} // namespace
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
new file mode 100644
index 000000000..020a4da3f
--- /dev/null
+++ b/src/video_core/command_processor.cpp
@@ -0,0 +1,238 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include "command_processor.h"
6#include "math.h"
7#include "pica.h"
8#include "primitive_assembly.h"
9#include "vertex_shader.h"
10
11
12namespace Pica {
13
14Regs registers;
15
16namespace CommandProcessor {
17
18static int float_regs_counter = 0;
19
20static u32 uniform_write_buffer[4];
21
22// Used for VSLoadProgramData and VSLoadSwizzleData
23static u32 vs_binary_write_offset = 0;
24static u32 vs_swizzle_write_offset = 0;
25
26static inline void WritePicaReg(u32 id, u32 value) {
27 u32 old_value = registers[id];
28 registers[id] = value;
29
30 switch(id) {
31 // It seems like these trigger vertex rendering
32 case PICA_REG_INDEX(trigger_draw):
33 case PICA_REG_INDEX(trigger_draw_indexed):
34 {
35 const auto& attribute_config = registers.vertex_attributes;
36 const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress());
37
38 // Information about internal vertex attributes
39 const u8* vertex_attribute_sources[16];
40 u32 vertex_attribute_strides[16];
41 u32 vertex_attribute_formats[16];
42 u32 vertex_attribute_elements[16];
43 u32 vertex_attribute_element_size[16];
44
45 // Setup attribute data from loaders
46 for (int loader = 0; loader < 12; ++loader) {
47 const auto& loader_config = attribute_config.attribute_loaders[loader];
48
49 const u8* load_address = base_address + loader_config.data_offset;
50
51 // TODO: What happens if a loader overwrites a previous one's data?
52 for (int component = 0; component < loader_config.component_count; ++component) {
53 u32 attribute_index = loader_config.GetComponent(component);
54 vertex_attribute_sources[attribute_index] = load_address;
55 vertex_attribute_strides[attribute_index] = loader_config.byte_count;
56 vertex_attribute_formats[attribute_index] = (u32)attribute_config.GetFormat(attribute_index);
57 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
58 vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index);
59 load_address += attribute_config.GetStride(attribute_index);
60 }
61 }
62
63 // Load vertices
64 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
65
66 const auto& index_info = registers.index_array;
67 const u8* index_address_8 = (u8*)base_address + index_info.offset;
68 const u16* index_address_16 = (u16*)index_address_8;
69 bool index_u16 = (bool)index_info.format;
70
71 for (int index = 0; index < registers.num_vertices; ++index)
72 {
73 int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
74
75 if (is_indexed) {
76 // TODO: Implement some sort of vertex cache!
77 }
78
79 // Initialize data for the current vertex
80 VertexShader::InputVertex input;
81
82 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
83 for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
84 const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
85 const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata :
86 (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata :
87 (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata :
88 *(float*)srcdata;
89 input.attr[i][comp] = float24::FromFloat32(srcval);
90 DEBUG_LOG(GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
91 comp, i, vertex, index,
92 attribute_config.GetBaseAddress(),
93 vertex_attribute_sources[i] - base_address,
94 srcdata - vertex_attribute_sources[i],
95 input.attr[i][comp].ToFloat32());
96 }
97 }
98 VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes());
99
100 if (is_indexed) {
101 // TODO: Add processed vertex to vertex cache!
102 }
103
104 PrimitiveAssembly::SubmitVertex(output);
105 }
106 break;
107 }
108
109 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
110 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
111 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):
112 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4):
113 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5):
114 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6):
115 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7):
116 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8):
117 {
118 auto& uniform_setup = registers.vs_uniform_setup;
119
120 // TODO: Does actual hardware indeed keep an intermediate buffer or does
121 // it directly write the values?
122 uniform_write_buffer[float_regs_counter++] = value;
123
124 // Uniforms are written in a packed format such that 4 float24 values are encoded in
125 // three 32-bit numbers. We write to internal memory once a full such vector is
126 // written.
127 if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
128 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
129 float_regs_counter = 0;
130
131 auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index);
132
133 if (uniform_setup.index > 95) {
134 ERROR_LOG(GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
135 break;
136 }
137
138 // NOTE: The destination component order indeed is "backwards"
139 if (uniform_setup.IsFloat32()) {
140 for (auto i : {0,1,2,3})
141 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
142 } else {
143 // TODO: Untested
144 uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8);
145 uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
146 uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
147 uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF);
148 }
149
150 DEBUG_LOG(GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
151 uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
152 uniform.w.ToFloat32());
153
154 // TODO: Verify that this actually modifies the register!
155 uniform_setup.index = uniform_setup.index + 1;
156 }
157 break;
158 }
159
160 // Seems to be used to reset the write pointer for VSLoadProgramData
161 case PICA_REG_INDEX(vs_program.begin_load):
162 vs_binary_write_offset = 0;
163 break;
164
165 // Load shader program code
166 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc):
167 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd):
168 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce):
169 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf):
170 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0):
171 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1):
172 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2):
173 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3):
174 {
175 VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value);
176 vs_binary_write_offset++;
177 break;
178 }
179
180 // Seems to be used to reset the write pointer for VSLoadSwizzleData
181 case PICA_REG_INDEX(vs_swizzle_patterns.begin_load):
182 vs_swizzle_write_offset = 0;
183 break;
184
185 // Load swizzle pattern data
186 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6):
187 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7):
188 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8):
189 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9):
190 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da):
191 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db):
192 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc):
193 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd):
194 {
195 VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value);
196 vs_swizzle_write_offset++;
197 break;
198 }
199
200 default:
201 break;
202 }
203}
204
205static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) {
206 const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]);
207
208 u32* read_pointer = (u32*)first_command_word;
209
210 // TODO: Take parameter mask into consideration!
211
212 WritePicaReg(header.cmd_id, *read_pointer);
213 read_pointer += 2;
214
215 for (int i = 1; i < 1+header.extra_data_length; ++i) {
216 u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0);
217 WritePicaReg(cmd, *read_pointer);
218 ++read_pointer;
219 }
220
221 // align read pointer to 8 bytes
222 if ((first_command_word - read_pointer) % 2)
223 ++read_pointer;
224
225 return read_pointer - first_command_word;
226}
227
228void ProcessCommandList(const u32* list, u32 size) {
229 u32* read_pointer = (u32*)list;
230
231 while (read_pointer < list + size) {
232 read_pointer += ExecuteCommandBlock(read_pointer);
233 }
234}
235
236} // namespace
237
238} // namespace
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
new file mode 100644
index 000000000..6b6241a25
--- /dev/null
+++ b/src/video_core/command_processor.h
@@ -0,0 +1,31 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9
10#include "pica.h"
11
12namespace Pica {
13
14namespace CommandProcessor {
15
16union CommandHeader {
17 u32 hex;
18
19 BitField< 0, 16, u32> cmd_id;
20 BitField<16, 4, u32> parameter_mask;
21 BitField<20, 11, u32> extra_data_length;
22 BitField<31, 1, u32> group_commands;
23};
24static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout");
25static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
26
27void ProcessCommandList(const u32* list, u32 size);
28
29} // namespace
30
31} // namespace
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h
index 5d85f90b9..2ba873457 100644
--- a/src/video_core/gpu_debugger.h
+++ b/src/video_core/gpu_debugger.h
@@ -11,6 +11,8 @@
11#include "common/log.h" 11#include "common/log.h"
12 12
13#include "core/hle/service/gsp.h" 13#include "core/hle/service/gsp.h"
14
15#include "command_processor.h"
14#include "pica.h" 16#include "pica.h"
15 17
16class GraphicsDebugger 18class GraphicsDebugger
@@ -20,10 +22,10 @@ public:
20 // A vector of commands represented by their raw byte sequence 22 // A vector of commands represented by their raw byte sequence
21 struct PicaCommand : public std::vector<u32> 23 struct PicaCommand : public std::vector<u32>
22 { 24 {
23 const Pica::CommandHeader& GetHeader() const 25 const Pica::CommandProcessor::CommandHeader& GetHeader() const
24 { 26 {
25 const u32& val = at(1); 27 const u32& val = at(1);
26 return *(Pica::CommandHeader*)&val; 28 return *(Pica::CommandProcessor::CommandHeader*)&val;
27 } 29 }
28 }; 30 };
29 31
@@ -99,7 +101,7 @@ public:
99 PicaCommandList cmdlist; 101 PicaCommandList cmdlist;
100 for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) 102 for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;)
101 { 103 {
102 const Pica::CommandHeader header = static_cast<Pica::CommandHeader>(parse_pointer[1]); 104 const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]);
103 105
104 cmdlist.push_back(PicaCommand()); 106 cmdlist.push_back(PicaCommand());
105 auto& cmd = cmdlist.back(); 107 auto& cmd = cmdlist.back();
diff --git a/src/video_core/math.h b/src/video_core/math.h
new file mode 100644
index 000000000..7030f2cfb
--- /dev/null
+++ b/src/video_core/math.h
@@ -0,0 +1,578 @@
1// Licensed under GPLv2
2// Refer to the license.txt file included.
3
4
5// Copyright 2014 Tony Wasserka
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are met:
10//
11// * Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// * Redistributions in binary form must reproduce the above copyright
14// notice, this list of conditions and the following disclaimer in the
15// documentation and/or other materials provided with the distribution.
16// * Neither the name of the owner nor the names of its contributors may
17// be used to endorse or promote products derived from this software
18// without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32#pragma once
33
34#include <cmath>
35
36namespace Math {
37
38template<typename T> class Vec2;
39template<typename T> class Vec3;
40template<typename T> class Vec4;
41
42
43template<typename T>
44class Vec2 {
45public:
46 struct {
47 T x,y;
48 };
49
50 T* AsArray() { return &x; }
51
52 Vec2() = default;
53 Vec2(const T a[2]) : x(a[0]), y(a[1]) {}
54 Vec2(const T& _x, const T& _y) : x(_x), y(_y) {}
55
56 template<typename T2>
57 Vec2<T2> Cast() const {
58 return Vec2<T2>((T2)x, (T2)y);
59 }
60
61 static Vec2 AssignToAll(const T& f)
62 {
63 return Vec2<T>(f, f);
64 }
65
66 void Write(T a[2])
67 {
68 a[0] = x; a[1] = y;
69 }
70
71 Vec2 operator +(const Vec2& other) const
72 {
73 return Vec2(x+other.x, y+other.y);
74 }
75 void operator += (const Vec2 &other)
76 {
77 x+=other.x; y+=other.y;
78 }
79 Vec2 operator -(const Vec2& other) const
80 {
81 return Vec2(x-other.x, y-other.y);
82 }
83 void operator -= (const Vec2& other)
84 {
85 x-=other.x; y-=other.y;
86 }
87 Vec2 operator -() const
88 {
89 return Vec2(-x,-y);
90 }
91 Vec2 operator * (const Vec2& other) const
92 {
93 return Vec2(x*other.x, y*other.y);
94 }
95 template<typename V>
96 Vec2 operator * (const V& f) const
97 {
98 return Vec2(x*f,y*f);
99 }
100 template<typename V>
101 void operator *= (const V& f)
102 {
103 x*=f; y*=f;
104 }
105 template<typename V>
106 Vec2 operator / (const V& f) const
107 {
108 return Vec2(x/f,y/f);
109 }
110 template<typename V>
111 void operator /= (const V& f)
112 {
113 *this = *this / f;
114 }
115
116 T Length2() const
117 {
118 return x*x + y*y;
119 }
120
121 // Only implemented for T=float
122 float Length() const;
123 void SetLength(const float l);
124 Vec2 WithLength(const float l) const;
125 float Distance2To(Vec2 &other);
126 Vec2 Normalized() const;
127 float Normalize(); // returns the previous length, which is often useful
128
129 T& operator [] (int i) //allow vector[1] = 3 (vector.y=3)
130 {
131 return *((&x) + i);
132 }
133 T operator [] (const int i) const
134 {
135 return *((&x) + i);
136 }
137
138 void SetZero()
139 {
140 x=0; y=0;
141 }
142
143 // Common aliases: UV (texel coordinates), ST (texture coordinates)
144 T& u() { return x; }
145 T& v() { return y; }
146 T& s() { return x; }
147 T& t() { return y; }
148
149 const T& u() const { return x; }
150 const T& v() const { return y; }
151 const T& s() const { return x; }
152 const T& t() const { return y; }
153
154 // swizzlers - create a subvector of specific components
155 Vec2 yx() const { return Vec2(y, x); }
156 Vec2 vu() const { return Vec2(y, x); }
157 Vec2 ts() const { return Vec2(y, x); }
158
159 // Inserters to add new elements to effectively create larger vectors containing this Vec2
160 Vec3<T> InsertBeforeX(const T& value) {
161 return Vec3<T>(value, x, y);
162 }
163 Vec3<T> InsertBeforeY(const T& value) {
164 return Vec3<T>(x, value, y);
165 }
166 Vec3<T> Append(const T& value) {
167 return Vec3<T>(x, y, value);
168 }
169};
170
171template<typename T, typename V>
172Vec2<T> operator * (const V& f, const Vec2<T>& vec)
173{
174 return Vec2<T>(f*vec.x,f*vec.y);
175}
176
177typedef Vec2<float> Vec2f;
178
179template<typename T>
180class Vec3
181{
182public:
183 struct
184 {
185 T x,y,z;
186 };
187
188 T* AsArray() { return &x; }
189
190 Vec3() = default;
191 Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
192 Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
193
194 template<typename T2>
195 Vec3<T2> Cast() const {
196 return Vec3<T2>((T2)x, (T2)y, (T2)z);
197 }
198
199 // Only implemented for T=int and T=float
200 static Vec3 FromRGB(unsigned int rgb);
201 unsigned int ToRGB() const; // alpha bits set to zero
202
203 static Vec3 AssignToAll(const T& f)
204 {
205 return Vec3<T>(f, f, f);
206 }
207
208 void Write(T a[3])
209 {
210 a[0] = x; a[1] = y; a[2] = z;
211 }
212
213 Vec3 operator +(const Vec3 &other) const
214 {
215 return Vec3(x+other.x, y+other.y, z+other.z);
216 }
217 void operator += (const Vec3 &other)
218 {
219 x+=other.x; y+=other.y; z+=other.z;
220 }
221 Vec3 operator -(const Vec3 &other) const
222 {
223 return Vec3(x-other.x, y-other.y, z-other.z);
224 }
225 void operator -= (const Vec3 &other)
226 {
227 x-=other.x; y-=other.y; z-=other.z;
228 }
229 Vec3 operator -() const
230 {
231 return Vec3(-x,-y,-z);
232 }
233 Vec3 operator * (const Vec3 &other) const
234 {
235 return Vec3(x*other.x, y*other.y, z*other.z);
236 }
237 template<typename V>
238 Vec3 operator * (const V& f) const
239 {
240 return Vec3(x*f,y*f,z*f);
241 }
242 template<typename V>
243 void operator *= (const V& f)
244 {
245 x*=f; y*=f; z*=f;
246 }
247 template<typename V>
248 Vec3 operator / (const V& f) const
249 {
250 return Vec3(x/f,y/f,z/f);
251 }
252 template<typename V>
253 void operator /= (const V& f)
254 {
255 *this = *this / f;
256 }
257
258 T Length2() const
259 {
260 return x*x + y*y + z*z;
261 }
262
263 // Only implemented for T=float
264 float Length() const;
265 void SetLength(const float l);
266 Vec3 WithLength(const float l) const;
267 float Distance2To(Vec3 &other);
268 Vec3 Normalized() const;
269 float Normalize(); // returns the previous length, which is often useful
270
271 T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
272 {
273 return *((&x) + i);
274 }
275 T operator [] (const int i) const
276 {
277 return *((&x) + i);
278 }
279
280 void SetZero()
281 {
282 x=0; y=0; z=0;
283 }
284
285 // Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
286 T& u() { return x; }
287 T& v() { return y; }
288 T& w() { return z; }
289
290 T& r() { return x; }
291 T& g() { return y; }
292 T& b() { return z; }
293
294 T& s() { return x; }
295 T& t() { return y; }
296 T& q() { return z; }
297
298 const T& u() const { return x; }
299 const T& v() const { return y; }
300 const T& w() const { return z; }
301
302 const T& r() const { return x; }
303 const T& g() const { return y; }
304 const T& b() const { return z; }
305
306 const T& s() const { return x; }
307 const T& t() const { return y; }
308 const T& q() const { return z; }
309
310 // swizzlers - create a subvector of specific components
311 // e.g. Vec2 uv() { return Vec2(x,y); }
312 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
313#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); }
314#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
315 _DEFINE_SWIZZLER2(a, b, a##b); \
316 _DEFINE_SWIZZLER2(a, b, a2##b2); \
317 _DEFINE_SWIZZLER2(a, b, a3##b3); \
318 _DEFINE_SWIZZLER2(a, b, a4##b4); \
319 _DEFINE_SWIZZLER2(b, a, b##a); \
320 _DEFINE_SWIZZLER2(b, a, b2##a2); \
321 _DEFINE_SWIZZLER2(b, a, b3##a3); \
322 _DEFINE_SWIZZLER2(b, a, b4##a4);
323
324 DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
325 DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
326 DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
327#undef DEFINE_SWIZZLER2
328#undef _DEFINE_SWIZZLER2
329
330 // Inserters to add new elements to effectively create larger vectors containing this Vec2
331 Vec4<T> InsertBeforeX(const T& value) {
332 return Vec4<T>(value, x, y, z);
333 }
334 Vec4<T> InsertBeforeY(const T& value) {
335 return Vec4<T>(x, value, y, z);
336 }
337 Vec4<T> InsertBeforeZ(const T& value) {
338 return Vec4<T>(x, y, value, z);
339 }
340 Vec4<T> Append(const T& value) {
341 return Vec4<T>(x, y, z, value);
342 }
343};
344
345template<typename T, typename V>
346Vec3<T> operator * (const V& f, const Vec3<T>& vec)
347{
348 return Vec3<T>(f*vec.x,f*vec.y,f*vec.z);
349}
350
351typedef Vec3<float> Vec3f;
352
353template<typename T>
354class Vec4
355{
356public:
357 struct
358 {
359 T x,y,z,w;
360 };
361
362 T* AsArray() { return &x; }
363
364 Vec4() = default;
365 Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
366 Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {}
367
368 template<typename T2>
369 Vec4<T2> Cast() const {
370 return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w);
371 }
372
373 // Only implemented for T=int and T=float
374 static Vec4 FromRGBA(unsigned int rgba);
375 unsigned int ToRGBA() const;
376
377 static Vec4 AssignToAll(const T& f) {
378 return Vec4<T>(f, f, f, f);
379 }
380
381 void Write(T a[4])
382 {
383 a[0] = x; a[1] = y; a[2] = z; a[3] = w;
384 }
385
386 Vec4 operator +(const Vec4& other) const
387 {
388 return Vec4(x+other.x, y+other.y, z+other.z, w+other.w);
389 }
390 void operator += (const Vec4& other)
391 {
392 x+=other.x; y+=other.y; z+=other.z; w+=other.w;
393 }
394 Vec4 operator -(const Vec4 &other) const
395 {
396 return Vec4(x-other.x, y-other.y, z-other.z, w-other.w);
397 }
398 void operator -= (const Vec4 &other)
399 {
400 x-=other.x; y-=other.y; z-=other.z; w-=other.w;
401 }
402 Vec4 operator -() const
403 {
404 return Vec4(-x,-y,-z,-w);
405 }
406 Vec4 operator * (const Vec4 &other) const
407 {
408 return Vec4(x*other.x, y*other.y, z*other.z, w*other.w);
409 }
410 template<typename V>
411 Vec4 operator * (const V& f) const
412 {
413 return Vec4(x*f,y*f,z*f,w*f);
414 }
415 template<typename V>
416 void operator *= (const V& f)
417 {
418 x*=f; y*=f; z*=f; w*=f;
419 }
420 template<typename V>
421 Vec4 operator / (const V& f) const
422 {
423 return Vec4(x/f,y/f,z/f,w/f);
424 }
425 template<typename V>
426 void operator /= (const V& f)
427 {
428 *this = *this / f;
429 }
430
431 T Length2() const
432 {
433 return x*x + y*y + z*z + w*w;
434 }
435
436 // Only implemented for T=float
437 float Length() const;
438 void SetLength(const float l);
439 Vec4 WithLength(const float l) const;
440 float Distance2To(Vec4 &other);
441 Vec4 Normalized() const;
442 float Normalize(); // returns the previous length, which is often useful
443
444 T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
445 {
446 return *((&x) + i);
447 }
448 T operator [] (const int i) const
449 {
450 return *((&x) + i);
451 }
452
453 void SetZero()
454 {
455 x=0; y=0; z=0;
456 }
457
458 // Common alias: RGBA (colors)
459 T& r() { return x; }
460 T& g() { return y; }
461 T& b() { return z; }
462 T& a() { return w; }
463
464 const T& r() const { return x; }
465 const T& g() const { return y; }
466 const T& b() const { return z; }
467 const T& a() const { return w; }
468
469 // swizzlers - create a subvector of specific components
470 // e.g. Vec2 uv() { return Vec2(x,y); }
471 // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
472#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); }
473#define DEFINE_SWIZZLER2(a, b, a2, b2) \
474 _DEFINE_SWIZZLER2(a, b, a##b); \
475 _DEFINE_SWIZZLER2(a, b, a2##b2); \
476 _DEFINE_SWIZZLER2(b, a, b##a); \
477 _DEFINE_SWIZZLER2(b, a, b2##a2);
478
479 DEFINE_SWIZZLER2(x, y, r, g);
480 DEFINE_SWIZZLER2(x, z, r, b);
481 DEFINE_SWIZZLER2(x, w, r, a);
482 DEFINE_SWIZZLER2(y, z, g, b);
483 DEFINE_SWIZZLER2(y, w, g, a);
484 DEFINE_SWIZZLER2(z, w, b, a);
485#undef DEFINE_SWIZZLER2
486#undef _DEFINE_SWIZZLER2
487
488#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); }
489#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \
490 _DEFINE_SWIZZLER3(a, b, c, a##b##c); \
491 _DEFINE_SWIZZLER3(a, c, b, a##c##b); \
492 _DEFINE_SWIZZLER3(b, a, c, b##a##c); \
493 _DEFINE_SWIZZLER3(b, c, a, b##c##a); \
494 _DEFINE_SWIZZLER3(c, a, b, c##a##b); \
495 _DEFINE_SWIZZLER3(c, b, a, c##b##a); \
496 _DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \
497 _DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \
498 _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \
499 _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \
500 _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \
501 _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2);
502
503 DEFINE_SWIZZLER3(x, y, z, r, g, b);
504 DEFINE_SWIZZLER3(x, y, w, r, g, a);
505 DEFINE_SWIZZLER3(x, z, w, r, b, a);
506 DEFINE_SWIZZLER3(y, z, w, g, b, a);
507#undef DEFINE_SWIZZLER3
508#undef _DEFINE_SWIZZLER3
509};
510
511
512template<typename T, typename V>
513Vec4<T> operator * (const V& f, const Vec4<T>& vec)
514{
515 return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w);
516}
517
518typedef Vec4<float> Vec4f;
519
520
521template<typename T>
522static inline T Dot(const Vec2<T>& a, const Vec2<T>& b)
523{
524 return a.x*b.x + a.y*b.y;
525}
526
527template<typename T>
528static inline T Dot(const Vec3<T>& a, const Vec3<T>& b)
529{
530 return a.x*b.x + a.y*b.y + a.z*b.z;
531}
532
533template<typename T>
534static inline T Dot(const Vec4<T>& a, const Vec4<T>& b)
535{
536 return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
537}
538
539template<typename T>
540static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b)
541{
542 return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
543}
544
545// linear interpolation via float: 0.0=begin, 1.0=end
546template<typename X>
547static inline X Lerp(const X& begin, const X& end, const float t)
548{
549 return begin*(1.f-t) + end*t;
550}
551
552// linear interpolation via int: 0=begin, base=end
553template<typename X, int base>
554static inline X LerpInt(const X& begin, const X& end, const int t)
555{
556 return (begin*(base-t) + end*t) / base;
557}
558
559// Utility vector factories
560template<typename T>
561static inline Vec2<T> MakeVec2(const T& x, const T& y)
562{
563 return Vec2<T>{x, y};
564}
565
566template<typename T>
567static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z)
568{
569 return Vec3<T>{x, y, z};
570}
571
572template<typename T>
573static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w)
574{
575 return Vec4<T>{x, y, z, w};
576}
577
578} // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index f0fa3aba9..81af57336 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -4,126 +4,567 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
7#include <initializer_list> 8#include <initializer_list>
8#include <map> 9#include <map>
9 10
10#include "common/bit_field.h" 11#include "common/bit_field.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "common/register_set.h" 13
14#include "core/mem_map.h"
13 15
14namespace Pica { 16namespace Pica {
15 17
18// Returns index corresponding to the Regs member labeled by field_name
19// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
20// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])).
21// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
22// Hopefully, this will be fixed sometime in the future.
23// For lack of better alternatives, we currently hardcode the offsets when constant
24// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
25// will then make sure the offsets indeed match the automatically calculated ones).
26#define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32))
27#if defined(_MSC_VER)
28#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
29#else
30// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
31// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX
32// and then performs a (no-op) cast to size_t iff the second argument matches the expected
33// field offset. Otherwise, the compiler will fail to compile this code.
34#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
35 ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name))
36#endif // _MSC_VER
37
16struct Regs { 38struct Regs {
17 enum Id : u32 { 39
18 ViewportSizeX = 0x41, 40// helper macro to properly align structure members.
19 ViewportInvSizeX = 0x42, 41// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121",
20 ViewportSizeY = 0x43, 42// depending on the current source line to make sure variable names are unique.
21 ViewportInvSizeY = 0x44, 43#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
22 ViewportCorner = 0x68, 44#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
23 DepthBufferFormat = 0x116, 45#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
24 ColorBufferFormat = 0x117, 46
25 DepthBufferAddress = 0x11C, 47 INSERT_PADDING_WORDS(0x41);
26 ColorBufferAddress = 0x11D, 48
27 ColorBufferSize = 0x11E, 49 BitField<0, 24, u32> viewport_size_x;
28 50 INSERT_PADDING_WORDS(0x1);
29 VertexArrayBaseAddr = 0x200, 51 BitField<0, 24, u32> viewport_size_y;
30 VertexDescriptor = 0x201, // 0x202 52
31 VertexAttributeOffset = 0x203, // 0x206,0x209,0x20C,0x20F,0x212,0x215,0x218,0x21B,0x21E,0x221,0x224 53 INSERT_PADDING_WORDS(0x9);
32 VertexAttributeInfo0 = 0x204, // 0x207,0x20A,0x20D,0x210,0x213,0x216,0x219,0x21C,0x21F,0x222,0x225 54
33 VertexAttributeInfo1 = 0x205, // 0x208,0x20B,0x20E,0x211,0x214,0x217,0x21A,0x21D,0x220,0x223,0x226 55 BitField<0, 24, u32> viewport_depth_range; // float24
34 56 BitField<0, 24, u32> viewport_depth_far_plane; // float24
35 NumIds = 0x300, 57
58 INSERT_PADDING_WORDS(0x1);
59
60 union {
61 // Maps components of output vertex attributes to semantics
62 enum Semantic : u32
63 {
64 POSITION_X = 0,
65 POSITION_Y = 1,
66 POSITION_Z = 2,
67 POSITION_W = 3,
68
69 COLOR_R = 8,
70 COLOR_G = 9,
71 COLOR_B = 10,
72 COLOR_A = 11,
73
74 TEXCOORD0_U = 12,
75 TEXCOORD0_V = 13,
76 TEXCOORD1_U = 14,
77 TEXCOORD1_V = 15,
78 TEXCOORD2_U = 22,
79 TEXCOORD2_V = 23,
80
81 INVALID = 31,
82 };
83
84 BitField< 0, 5, Semantic> map_x;
85 BitField< 8, 5, Semantic> map_y;
86 BitField<16, 5, Semantic> map_z;
87 BitField<24, 5, Semantic> map_w;
88 } vs_output_attributes[7];
89
90 INSERT_PADDING_WORDS(0x11);
91
92 union {
93 BitField< 0, 16, u32> x;
94 BitField<16, 16, u32> y;
95 } viewport_corner;
96
97 INSERT_PADDING_WORDS(0xa7);
98
99 struct {
100 enum ColorFormat : u32 {
101 RGBA8 = 0,
102 RGB8 = 1,
103 RGBA5551 = 2,
104 RGB565 = 3,
105 RGBA4 = 4,
106 };
107
108 INSERT_PADDING_WORDS(0x6);
109
110 u32 depth_format;
111 u32 color_format;
112
113 INSERT_PADDING_WORDS(0x4);
114
115 u32 depth_buffer_address;
116 u32 color_buffer_address;
117
118 union {
119 // Apparently, the framebuffer width is stored as expected,
120 // while the height is stored as the actual height minus one.
121 // Hence, don't access these fields directly but use the accessors
122 // GetWidth() and GetHeight() instead.
123 BitField< 0, 11, u32> width;
124 BitField<12, 10, u32> height;
125 };
126
127 INSERT_PADDING_WORDS(0x1);
128
129 inline u32 GetColorBufferAddress() const {
130 return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(color_buffer_address));
131 }
132 inline u32 GetDepthBufferAddress() const {
133 return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(depth_buffer_address));
134 }
135
136 inline u32 GetWidth() const {
137 return width;
138 }
139
140 inline u32 GetHeight() const {
141 return height + 1;
142 }
143 } framebuffer;
144
145 INSERT_PADDING_WORDS(0xe0);
146
147 struct {
148 enum class Format : u64 {
149 BYTE = 0,
150 UBYTE = 1,
151 SHORT = 2,
152 FLOAT = 3,
153 };
154
155 BitField<0, 29, u32> base_address;
156
157 inline u32 GetBaseAddress() const {
158 // TODO: Ugly, should fix PhysicalToVirtualAddress instead
159 return DecodeAddressRegister(base_address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR;
160 }
161
162 // Descriptor for internal vertex attributes
163 union {
164 BitField< 0, 2, Format> format0; // size of one element
165 BitField< 2, 2, u64> size0; // number of elements minus 1
166 BitField< 4, 2, Format> format1;
167 BitField< 6, 2, u64> size1;
168 BitField< 8, 2, Format> format2;
169 BitField<10, 2, u64> size2;
170 BitField<12, 2, Format> format3;
171 BitField<14, 2, u64> size3;
172 BitField<16, 2, Format> format4;
173 BitField<18, 2, u64> size4;
174 BitField<20, 2, Format> format5;
175 BitField<22, 2, u64> size5;
176 BitField<24, 2, Format> format6;
177 BitField<26, 2, u64> size6;
178 BitField<28, 2, Format> format7;
179 BitField<30, 2, u64> size7;
180 BitField<32, 2, Format> format8;
181 BitField<34, 2, u64> size8;
182 BitField<36, 2, Format> format9;
183 BitField<38, 2, u64> size9;
184 BitField<40, 2, Format> format10;
185 BitField<42, 2, u64> size10;
186 BitField<44, 2, Format> format11;
187 BitField<46, 2, u64> size11;
188
189 BitField<48, 12, u64> attribute_mask;
190
191 // number of total attributes minus 1
192 BitField<60, 4, u64> num_extra_attributes;
193 };
194
195 inline Format GetFormat(int n) const {
196 Format formats[] = {
197 format0, format1, format2, format3,
198 format4, format5, format6, format7,
199 format8, format9, format10, format11
200 };
201 return formats[n];
202 }
203
204 inline int GetNumElements(int n) const {
205 u64 sizes[] = {
206 size0, size1, size2, size3,
207 size4, size5, size6, size7,
208 size8, size9, size10, size11
209 };
210 return (int)sizes[n]+1;
211 }
212
213 inline int GetElementSizeInBytes(int n) const {
214 return (GetFormat(n) == Format::FLOAT) ? 4 :
215 (GetFormat(n) == Format::SHORT) ? 2 : 1;
216 }
217
218 inline int GetStride(int n) const {
219 return GetNumElements(n) * GetElementSizeInBytes(n);
220 }
221
222 inline int GetNumTotalAttributes() const {
223 return (int)num_extra_attributes+1;
224 }
225
226 // Attribute loaders map the source vertex data to input attributes
227 // This e.g. allows to load different attributes from different memory locations
228 struct {
229 // Source attribute data offset from the base address
230 u32 data_offset;
231
232 union {
233 BitField< 0, 4, u64> comp0;
234 BitField< 4, 4, u64> comp1;
235 BitField< 8, 4, u64> comp2;
236 BitField<12, 4, u64> comp3;
237 BitField<16, 4, u64> comp4;
238 BitField<20, 4, u64> comp5;
239 BitField<24, 4, u64> comp6;
240 BitField<28, 4, u64> comp7;
241 BitField<32, 4, u64> comp8;
242 BitField<36, 4, u64> comp9;
243 BitField<40, 4, u64> comp10;
244 BitField<44, 4, u64> comp11;
245
246 // bytes for a single vertex in this loader
247 BitField<48, 8, u64> byte_count;
248
249 BitField<60, 4, u64> component_count;
250 };
251
252 inline int GetComponent(int n) const {
253 u64 components[] = {
254 comp0, comp1, comp2, comp3,
255 comp4, comp5, comp6, comp7,
256 comp8, comp9, comp10, comp11
257 };
258 return (int)components[n];
259 }
260 } attribute_loaders[12];
261 } vertex_attributes;
262
263 struct {
264 enum IndexFormat : u32 {
265 BYTE = 0,
266 SHORT = 1,
267 };
268
269 union {
270 BitField<0, 31, u32> offset; // relative to base attribute address
271 BitField<31, 1, IndexFormat> format;
272 };
273 } index_array;
274
275 // Number of vertices to render
276 u32 num_vertices;
277
278 INSERT_PADDING_WORDS(0x5);
279
280 // These two trigger rendering of triangles
281 u32 trigger_draw;
282 u32 trigger_draw_indexed;
283
284 INSERT_PADDING_WORDS(0x2e);
285
286 enum class TriangleTopology : u32 {
287 List = 0,
288 Strip = 1,
289 Fan = 2,
290 ListIndexed = 3, // TODO: No idea if this is correct
36 }; 291 };
37 292
38 template<Id id> 293 BitField<8, 2, TriangleTopology> triangle_topology;
39 union Struct;
40};
41 294
42static inline Regs::Id VertexAttributeOffset(int n) 295 INSERT_PADDING_WORDS(0x5b);
43{
44 return static_cast<Regs::Id>(0x203 + 3*n);
45}
46 296
47static inline Regs::Id VertexAttributeInfo0(int n) 297 // Offset to shader program entry point (in words)
48{ 298 BitField<0, 16, u32> vs_main_offset;
49 return static_cast<Regs::Id>(0x204 + 3*n);
50}
51 299
52static inline Regs::Id VertexAttributeInfo1(int n) 300 union {
53{ 301 BitField< 0, 4, u64> attribute0_register;
54 return static_cast<Regs::Id>(0x205 + 3*n); 302 BitField< 4, 4, u64> attribute1_register;
55} 303 BitField< 8, 4, u64> attribute2_register;
304 BitField<12, 4, u64> attribute3_register;
305 BitField<16, 4, u64> attribute4_register;
306 BitField<20, 4, u64> attribute5_register;
307 BitField<24, 4, u64> attribute6_register;
308 BitField<28, 4, u64> attribute7_register;
309 BitField<32, 4, u64> attribute8_register;
310 BitField<36, 4, u64> attribute9_register;
311 BitField<40, 4, u64> attribute10_register;
312 BitField<44, 4, u64> attribute11_register;
313 BitField<48, 4, u64> attribute12_register;
314 BitField<52, 4, u64> attribute13_register;
315 BitField<56, 4, u64> attribute14_register;
316 BitField<60, 4, u64> attribute15_register;
56 317
57union CommandHeader { 318 int GetRegisterForAttribute(int attribute_index) {
58 CommandHeader(u32 h) : hex(h) {} 319 u64 fields[] = {
320 attribute0_register, attribute1_register, attribute2_register, attribute3_register,
321 attribute4_register, attribute5_register, attribute6_register, attribute7_register,
322 attribute8_register, attribute9_register, attribute10_register, attribute11_register,
323 attribute12_register, attribute13_register, attribute14_register, attribute15_register,
324 };
325 return (int)fields[attribute_index];
326 }
327 } vs_input_register_map;
59 328
60 u32 hex; 329 INSERT_PADDING_WORDS(0x3);
61 330
62 BitField< 0, 16, Regs::Id> cmd_id; 331 struct {
63 BitField<16, 4, u32> parameter_mask; 332 enum Format : u32
64 BitField<20, 11, u32> extra_data_length; 333 {
65 BitField<31, 1, u32> group_commands; 334 FLOAT24 = 0,
66}; 335 FLOAT32 = 1
336 };
67 337
68static std::map<Regs::Id, const char*> command_names = { 338 bool IsFloat32() const {
69 {Regs::ViewportSizeX, "ViewportSizeX" }, 339 return format == FLOAT32;
70 {Regs::ViewportInvSizeX, "ViewportInvSizeX" }, 340 }
71 {Regs::ViewportSizeY, "ViewportSizeY" }, 341
72 {Regs::ViewportInvSizeY, "ViewportInvSizeY" }, 342 union {
73 {Regs::ViewportCorner, "ViewportCorner" }, 343 // Index of the next uniform to write to
74 {Regs::DepthBufferFormat, "DepthBufferFormat" }, 344 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
75 {Regs::ColorBufferFormat, "ColorBufferFormat" }, 345 BitField<0, 7, u32> index;
76 {Regs::DepthBufferAddress, "DepthBufferAddress" }, 346
77 {Regs::ColorBufferAddress, "ColorBufferAddress" }, 347 BitField<31, 1, Format> format;
78 {Regs::ColorBufferSize, "ColorBufferSize" }, 348 };
79}; 349
350 // Writing to these registers sets the "current" uniform.
351 // TODO: It's not clear how the hardware stores what the "current" uniform is.
352 u32 set_value[8];
353
354 } vs_uniform_setup;
355
356 INSERT_PADDING_WORDS(0x2);
357
358 struct {
359 u32 begin_load;
360
361 // Writing to these registers sets the "current" word in the shader program.
362 // TODO: It's not clear how the hardware stores what the "current" word is.
363 u32 set_word[8];
364 } vs_program;
365
366 INSERT_PADDING_WORDS(0x1);
80 367
81template<> 368 // This register group is used to load an internal table of swizzling patterns,
82union Regs::Struct<Regs::ViewportSizeX> { 369 // which are indexed by each shader instruction to specify vector component swizzling.
83 BitField<0, 24, u32> value; 370 struct {
371 u32 begin_load;
372
373 // Writing to these registers sets the "current" swizzle pattern in the table.
374 // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is.
375 u32 set_word[8];
376 } vs_swizzle_patterns;
377
378 INSERT_PADDING_WORDS(0x22);
379
380#undef INSERT_PADDING_WORDS_HELPER1
381#undef INSERT_PADDING_WORDS_HELPER2
382#undef INSERT_PADDING_WORDS
383
384 // Map register indices to names readable by humans
385 // Used for debugging purposes, so performance is not an issue here
386 static std::string GetCommandName(int index) {
387 std::map<u32, std::string> map;
388 Regs regs;
389
390 // TODO: MSVC does not support using offsetof() on non-static data members even though this
391 // is technically allowed since C++11. Hence, this functionality is disabled until
392 // MSVC properly supports it.
393 #ifndef _MSC_VER
394 #define ADD_FIELD(name) \
395 do { \
396 map.insert({PICA_REG_INDEX(name), #name}); \
397 for (u32 i = PICA_REG_INDEX(name) + 1; i < PICA_REG_INDEX(name) + sizeof(regs.name) / 4; ++i) \
398 map.insert({i, #name + std::string("+") + std::to_string(i-PICA_REG_INDEX(name))}); \
399 } while(false)
400
401 ADD_FIELD(viewport_size_x);
402 ADD_FIELD(viewport_size_y);
403 ADD_FIELD(viewport_depth_range);
404 ADD_FIELD(viewport_depth_far_plane);
405 ADD_FIELD(viewport_corner);
406 ADD_FIELD(framebuffer);
407 ADD_FIELD(vertex_attributes);
408 ADD_FIELD(index_array);
409 ADD_FIELD(num_vertices);
410 ADD_FIELD(trigger_draw);
411 ADD_FIELD(trigger_draw_indexed);
412 ADD_FIELD(triangle_topology);
413 ADD_FIELD(vs_main_offset);
414 ADD_FIELD(vs_input_register_map);
415 ADD_FIELD(vs_uniform_setup);
416 ADD_FIELD(vs_program);
417 ADD_FIELD(vs_swizzle_patterns);
418
419 #undef ADD_FIELD
420 #endif // _MSC_VER
421
422 // Return empty string if no match is found
423 return map[index];
424 }
425
426 static inline int NumIds() {
427 return sizeof(Regs) / sizeof(u32);
428 }
429
430 u32& operator [] (int index) const {
431 u32* content = (u32*)this;
432 return content[index];
433 }
434
435 u32& operator [] (int index) {
436 u32* content = (u32*)this;
437 return content[index];
438 }
439
440private:
441 /*
442 * Most physical addresses which Pica registers refer to are 8-byte aligned.
443 * This function should be used to get the address from a raw register value.
444 */
445 static inline u32 DecodeAddressRegister(u32 register_value) {
446 return register_value * 8;
447 }
84}; 448};
85 449
86template<> 450// TODO: MSVC does not support using offsetof() on non-static data members even though this
87union Regs::Struct<Regs::ViewportSizeY> { 451// is technically allowed since C++11. This macro should be enabled once MSVC adds
88 BitField<0, 24, u32> value; 452// support for that.
453#ifndef _MSC_VER
454#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position")
455
456ASSERT_REG_POSITION(viewport_size_x, 0x41);
457ASSERT_REG_POSITION(viewport_size_y, 0x43);
458ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
459ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
460ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
461ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
462ASSERT_REG_POSITION(viewport_corner, 0x68);
463ASSERT_REG_POSITION(framebuffer, 0x110);
464ASSERT_REG_POSITION(vertex_attributes, 0x200);
465ASSERT_REG_POSITION(index_array, 0x227);
466ASSERT_REG_POSITION(num_vertices, 0x228);
467ASSERT_REG_POSITION(trigger_draw, 0x22e);
468ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
469ASSERT_REG_POSITION(triangle_topology, 0x25e);
470ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
471ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
472ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
473ASSERT_REG_POSITION(vs_program, 0x2cb);
474ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5);
475
476#undef ASSERT_REG_POSITION
477#endif // !defined(_MSC_VER)
478
479// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
480static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
481static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
482
483extern Regs registers; // TODO: Not sure if we want to have one global instance for this
484
485
486struct float24 {
487 static float24 FromFloat32(float val) {
488 float24 ret;
489 ret.value = val;
490 return ret;
491 }
492
493 // 16 bit mantissa, 7 bit exponent, 1 bit sign
494 // TODO: No idea if this works as intended
495 static float24 FromRawFloat24(u32 hex) {
496 float24 ret;
497 if ((hex & 0xFFFFFF) == 0) {
498 ret.value = 0;
499 } else {
500 u32 mantissa = hex & 0xFFFF;
501 u32 exponent = (hex >> 16) & 0x7F;
502 u32 sign = hex >> 23;
503 ret.value = powf(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * powf(2.0f, -16.f));
504 if (sign)
505 ret.value = -ret.value;
506 }
507 return ret;
508 }
509
510 // Not recommended for anything but logging
511 float ToFloat32() const {
512 return value;
513 }
514
515 float24 operator * (const float24& flt) const {
516 return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
517 }
518
519 float24 operator / (const float24& flt) const {
520 return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
521 }
522
523 float24 operator + (const float24& flt) const {
524 return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
525 }
526
527 float24 operator - (const float24& flt) const {
528 return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
529 }
530
531 float24 operator - () const {
532 return float24::FromFloat32(-ToFloat32());
533 }
534
535 bool operator < (const float24& flt) const {
536 return ToFloat32() < flt.ToFloat32();
537 }
538
539 bool operator > (const float24& flt) const {
540 return ToFloat32() > flt.ToFloat32();
541 }
542
543 bool operator >= (const float24& flt) const {
544 return ToFloat32() >= flt.ToFloat32();
545 }
546
547 bool operator <= (const float24& flt) const {
548 return ToFloat32() <= flt.ToFloat32();
549 }
550
551private:
552 float24() = default;
553
554 // Stored as a regular float, merely for convenience
555 // TODO: Perform proper arithmetic on this!
556 float value;
89}; 557};
90 558
91template<> 559union CommandHeader {
92union Regs::Struct<Regs::VertexDescriptor> { 560 CommandHeader(u32 h) : hex(h) {}
93 enum class Format : u64 { 561
94 BYTE = 0, 562 u32 hex;
95 UBYTE = 1,
96 SHORT = 2,
97 FLOAT = 3,
98 };
99 563
100 BitField< 0, 2, Format> format0; 564 BitField< 0, 16, u32> cmd_id;
101 BitField< 2, 2, u64> size0; // number of elements minus 1 565 BitField<16, 4, u32> parameter_mask;
102 BitField< 4, 2, Format> format1; 566 BitField<20, 11, u32> extra_data_length;
103 BitField< 6, 2, u64> size1; 567 BitField<31, 1, u32> group_commands;
104 BitField< 8, 2, Format> format2;
105 BitField<10, 2, u64> size2;
106 BitField<12, 2, Format> format3;
107 BitField<14, 2, u64> size3;
108 BitField<16, 2, Format> format4;
109 BitField<18, 2, u64> size4;
110 BitField<20, 2, Format> format5;
111 BitField<22, 2, u64> size5;
112 BitField<24, 2, Format> format6;
113 BitField<26, 2, u64> size6;
114 BitField<28, 2, Format> format7;
115 BitField<30, 2, u64> size7;
116 BitField<32, 2, Format> format8;
117 BitField<34, 2, u64> size8;
118 BitField<36, 2, Format> format9;
119 BitField<38, 2, u64> size9;
120 BitField<40, 2, Format> format10;
121 BitField<42, 2, u64> size10;
122 BitField<44, 2, Format> format11;
123 BitField<46, 2, u64> size11;
124
125 BitField<48, 12, u64> attribute_mask;
126 BitField<60, 4, u64> num_attributes; // number of total attributes minus 1
127}; 568};
128 569
129 570
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
new file mode 100644
index 000000000..2354ffb99
--- /dev/null
+++ b/src/video_core/primitive_assembly.cpp
@@ -0,0 +1,51 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include "clipper.h"
6#include "pica.h"
7#include "primitive_assembly.h"
8#include "vertex_shader.h"
9
10namespace Pica {
11
12namespace PrimitiveAssembly {
13
14static OutputVertex buffer[2];
15static int buffer_index = 0; // TODO: reset this on emulation restart
16
17void SubmitVertex(OutputVertex& vtx)
18{
19 switch (registers.triangle_topology) {
20 case Regs::TriangleTopology::List:
21 case Regs::TriangleTopology::ListIndexed:
22 if (buffer_index < 2) {
23 buffer[buffer_index++] = vtx;
24 } else {
25 buffer_index = 0;
26
27 Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
28 }
29 break;
30
31 case Regs::TriangleTopology::Fan:
32 if (buffer_index == 2) {
33 buffer_index = 0;
34
35 Clipper::ProcessTriangle(buffer[0], buffer[1], vtx);
36
37 buffer[1] = vtx;
38 } else {
39 buffer[buffer_index++] = vtx;
40 }
41 break;
42
43 default:
44 ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value());
45 break;
46 }
47}
48
49} // namespace
50
51} // namespace
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
new file mode 100644
index 000000000..2a2b0c170
--- /dev/null
+++ b/src/video_core/primitive_assembly.h
@@ -0,0 +1,21 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Pica {
8
9namespace VertexShader {
10 struct OutputVertex;
11}
12
13namespace PrimitiveAssembly {
14
15using VertexShader::OutputVertex;
16
17void SubmitVertex(OutputVertex& vtx);
18
19} // namespace
20
21} // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
new file mode 100644
index 000000000..a7c1bab3e
--- /dev/null
+++ b/src/video_core/rasterizer.cpp
@@ -0,0 +1,180 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/common_types.h"
8
9#include "math.h"
10#include "pica.h"
11#include "rasterizer.h"
12#include "vertex_shader.h"
13
14namespace Pica {
15
16namespace Rasterizer {
17
18static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
19 u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress());
20 u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
21
22 // Assuming RGBA8 format until actual framebuffer format handling is implemented
23 *(color_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value;
24}
25
26static u32 GetDepth(int x, int y) {
27 u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
28
29 // Assuming 16-bit depth buffer format until actual format handling is implemented
30 return *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2);
31}
32
33static void SetDepth(int x, int y, u16 value) {
34 u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
35
36 // Assuming 16-bit depth buffer format until actual format handling is implemented
37 *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value;
38}
39
40void ProcessTriangle(const VertexShader::OutputVertex& v0,
41 const VertexShader::OutputVertex& v1,
42 const VertexShader::OutputVertex& v2)
43{
44 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
45 struct Fix12P4 {
46 Fix12P4() {}
47 Fix12P4(u16 val) : val(val) {}
48
49 static u16 FracMask() { return 0xF; }
50 static u16 IntMask() { return (u16)~0xF; }
51
52 operator u16() const {
53 return val;
54 }
55
56 bool operator < (const Fix12P4& oth) const {
57 return (u16)*this < (u16)oth;
58 }
59
60 private:
61 u16 val;
62 };
63
64 // vertex positions in rasterizer coordinates
65 auto FloatToFix = [](float24 flt) {
66 return Fix12P4(flt.ToFloat32() * 16.0f);
67 };
68 auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
69 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
70 };
71 Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
72 ScreenToRasterizerCoordinates(v1.screenpos),
73 ScreenToRasterizerCoordinates(v2.screenpos) };
74
75 // TODO: Proper scissor rect test!
76 u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
77 u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
78 u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
79 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
80
81 min_x = min_x & Fix12P4::IntMask();
82 min_y = min_y & Fix12P4::IntMask();
83 max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask();
84 max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask();
85
86 // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
87 // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
88 // values which are added to the barycentric coordinates w0, w1 and w2, respectively.
89 // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
90 auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx,
91 const Math::Vec2<Fix12P4>& line1,
92 const Math::Vec2<Fix12P4>& line2)
93 {
94 if (line1.y == line2.y) {
95 // just check if vertex is above us => bottom line parallel to x-axis
96 return vtx.y < line1.y;
97 } else {
98 // check if vertex is on our left => right side
99 // TODO: Not sure how likely this is to overflow
100 return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y);
101 }
102 };
103 int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0;
104 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
105 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
106
107 // TODO: Not sure if looping through x first might be faster
108 for (u16 y = min_y; y < max_y; y += 0x10) {
109 for (u16 x = min_x; x < max_x; x += 0x10) {
110
111 // Calculate the barycentric coordinates w0, w1 and w2
112 auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
113 const Math::Vec2<Fix12P4>& vtx2,
114 const Math::Vec2<Fix12P4>& vtx3) {
115 const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0);
116 const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0);
117 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
118 return Cross(vec1, vec2).z;
119 };
120
121 int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
122 int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
123 int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
124 int wsum = w0 + w1 + w2;
125
126 // If current pixel is not covered by the current primitive
127 if (w0 < 0 || w1 < 0 || w2 < 0)
128 continue;
129
130 // Perspective correct attribute interpolation:
131 // Attribute values cannot be calculated by simple linear interpolation since
132 // they are not linear in screen space. For example, when interpolating a
133 // texture coordinate across two vertices, something simple like
134 // u = (u0*w0 + u1*w1)/(w0+w1)
135 // will not work. However, the attribute value divided by the
136 // clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
137 // in screenspace. Hence, we can linearly interpolate these two independently and
138 // calculate the interpolated attribute by dividing the results.
139 // I.e.
140 // u_over_w = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
141 // one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
142 // u = u_over_w / one_over_w
143 //
144 // The generalization to three vertices is straightforward in baricentric coordinates.
145 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
146 auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w,
147 attr1 / v1.pos.w,
148 attr2 / v2.pos.w);
149 auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w,
150 float24::FromFloat32(1.f) / v1.pos.w,
151 float24::FromFloat32(1.f) / v2.pos.w);
152 auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0),
153 float24::FromFloat32(w1),
154 float24::FromFloat32(w2));
155
156 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
157 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);
158 return interpolated_attr_over_w / interpolated_w_inverse;
159 };
160
161 Math::Vec4<u8> primary_color{
162 (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255),
163 (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255),
164 (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255),
165 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
166 };
167
168 u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
169 (float)v1.screenpos[2].ToFloat32() * w1 +
170 (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536?
171 SetDepth(x >> 4, y >> 4, z);
172
173 DrawPixel(x >> 4, y >> 4, primary_color);
174 }
175 }
176}
177
178} // namespace Rasterizer
179
180} // namespace Pica
diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h
new file mode 100644
index 000000000..500be9462
--- /dev/null
+++ b/src/video_core/rasterizer.h
@@ -0,0 +1,21 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Pica {
8
9namespace VertexShader {
10 struct OutputVertex;
11}
12
13namespace Rasterizer {
14
15void ProcessTriangle(const VertexShader::OutputVertex& v0,
16 const VertexShader::OutputVertex& v1,
17 const VertexShader::OutputVertex& v2);
18
19} // namespace Rasterizer
20
21} // namespace Pica
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index d0a8ec1da..f11a64fad 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -78,23 +78,23 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) {
78 */ 78 */
79void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { 79void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) {
80 80
81 const auto& framebuffer_top = GPU::g_regs.Get<GPU::Regs::FramebufferTop>(); 81 const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0];
82 const auto& framebuffer_sub = GPU::g_regs.Get<GPU::Regs::FramebufferBottom>(); 82 const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1];
83 const u32 active_fb_top = (framebuffer_top.active_fb == 1) 83 const u32 active_fb_top = (framebuffer_top.active_fb == 1)
84 ? framebuffer_top.address_left2 84 ? Memory::PhysicalToVirtualAddress(framebuffer_top.address_left2)
85 : framebuffer_top.address_left1; 85 : Memory::PhysicalToVirtualAddress(framebuffer_top.address_left1);
86 const u32 active_fb_sub = (framebuffer_sub.active_fb == 1) 86 const u32 active_fb_sub = (framebuffer_sub.active_fb == 1)
87 ? framebuffer_sub.address_left2 87 ? Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left2)
88 : framebuffer_sub.address_left1; 88 : Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left1);
89 89
90 DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x", 90 DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x",
91 framebuffer_top.stride * framebuffer_top.height, 91 framebuffer_top.stride * framebuffer_top.height,
92 GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.width, 92 active_fb_top, (int)framebuffer_top.width,
93 (int)framebuffer_top.height, (int)framebuffer_top.format); 93 (int)framebuffer_top.height, (int)framebuffer_top.format);
94 94
95 // TODO: This should consider the GPU registers for framebuffer width, height and stride. 95 // TODO: This should consider the GPU registers for framebuffer width, height and stride.
96 FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); 96 FlipFramebuffer(Memory::GetPointer(active_fb_top), m_xfb_top_flipped);
97 FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_sub), m_xfb_bottom_flipped); 97 FlipFramebuffer(Memory::GetPointer(active_fb_sub), m_xfb_bottom_flipped);
98 98
99 // Blit the top framebuffer 99 // Blit the top framebuffer
100 // ------------------------ 100 // ------------------------
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
new file mode 100644
index 000000000..93830a96a
--- /dev/null
+++ b/src/video_core/vertex_shader.cpp
@@ -0,0 +1,270 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include "pica.h"
6#include "vertex_shader.h"
7#include <core/mem_map.h>
8#include <common/file_util.h>
9
10namespace Pica {
11
12namespace VertexShader {
13
14static struct {
15 Math::Vec4<float24> f[96];
16} shader_uniforms;
17
18
19// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
20// For now, we just keep these local arrays around.
21static u32 shader_memory[1024];
22static u32 swizzle_data[1024];
23
24void SubmitShaderMemoryChange(u32 addr, u32 value)
25{
26 shader_memory[addr] = value;
27}
28
29void SubmitSwizzleDataChange(u32 addr, u32 value)
30{
31 swizzle_data[addr] = value;
32}
33
34Math::Vec4<float24>& GetFloatUniform(u32 index)
35{
36 return shader_uniforms.f[index];
37}
38
39struct VertexShaderState {
40 u32* program_counter;
41
42 const float24* input_register_table[16];
43 float24* output_register_table[7*4];
44
45 Math::Vec4<float24> temporary_registers[16];
46 bool status_registers[2];
47
48 enum {
49 INVALID_ADDRESS = 0xFFFFFFFF
50 };
51 u32 call_stack[8]; // TODO: What is the maximal call stack depth?
52 u32* call_stack_pointer;
53};
54
55static void ProcessShaderCode(VertexShaderState& state) {
56 while (true) {
57 bool increment_pc = true;
58 bool exit_loop = false;
59 const Instruction& instr = *(const Instruction*)state.program_counter;
60
61 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
62 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
63 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x
64 : nullptr;
65 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
66 : &state.temporary_registers[instr.common.src2-0x10].x;
67 // TODO: Unsure about the limit values
68 float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
69 : (instr.common.dest <= 0x3C) ? nullptr
70 : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
71 : nullptr;
72
73 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
74
75 const float24 src1[4] = {
76 src1_[(int)swizzle.GetSelectorSrc1(0)],
77 src1_[(int)swizzle.GetSelectorSrc1(1)],
78 src1_[(int)swizzle.GetSelectorSrc1(2)],
79 src1_[(int)swizzle.GetSelectorSrc1(3)],
80 };
81 const float24 src2[4] = {
82 src2_[(int)swizzle.GetSelectorSrc2(0)],
83 src2_[(int)swizzle.GetSelectorSrc2(1)],
84 src2_[(int)swizzle.GetSelectorSrc2(2)],
85 src2_[(int)swizzle.GetSelectorSrc2(3)],
86 };
87
88 switch (instr.opcode) {
89 case Instruction::OpCode::ADD:
90 {
91 for (int i = 0; i < 4; ++i) {
92 if (!swizzle.DestComponentEnabled(i))
93 continue;
94
95 dest[i] = src1[i] + src2[i];
96 }
97
98 break;
99 }
100
101 case Instruction::OpCode::MUL:
102 {
103 for (int i = 0; i < 4; ++i) {
104 if (!swizzle.DestComponentEnabled(i))
105 continue;
106
107 dest[i] = src1[i] * src2[i];
108 }
109
110 break;
111 }
112
113 case Instruction::OpCode::DP3:
114 case Instruction::OpCode::DP4:
115 {
116 float24 dot = float24::FromFloat32(0.f);
117 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
118 for (int i = 0; i < num_components; ++i)
119 dot = dot + src1[i] * src2[i];
120
121 for (int i = 0; i < num_components; ++i) {
122 if (!swizzle.DestComponentEnabled(i))
123 continue;
124
125 dest[i] = dot;
126 }
127 break;
128 }
129
130 // Reciprocal
131 case Instruction::OpCode::RCP:
132 {
133 for (int i = 0; i < 4; ++i) {
134 if (!swizzle.DestComponentEnabled(i))
135 continue;
136
137 // TODO: Be stable against division by zero!
138 // TODO: I think this might be wrong... we should only use one component here
139 dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32());
140 }
141
142 break;
143 }
144
145 // Reciprocal Square Root
146 case Instruction::OpCode::RSQ:
147 {
148 for (int i = 0; i < 4; ++i) {
149 if (!swizzle.DestComponentEnabled(i))
150 continue;
151
152 // TODO: Be stable against division by zero!
153 // TODO: I think this might be wrong... we should only use one component here
154 dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32()));
155 }
156
157 break;
158 }
159
160 case Instruction::OpCode::MOV:
161 {
162 for (int i = 0; i < 4; ++i) {
163 if (!swizzle.DestComponentEnabled(i))
164 continue;
165
166 dest[i] = src1[i];
167 }
168 break;
169 }
170
171 case Instruction::OpCode::RET:
172 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
173 exit_loop = true;
174 } else {
175 state.program_counter = &shader_memory[*state.call_stack_pointer--];
176 *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS;
177 }
178
179 break;
180
181 case Instruction::OpCode::CALL:
182 increment_pc = false;
183
184 _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack));
185
186 *++state.call_stack_pointer = state.program_counter - shader_memory;
187 // TODO: Does this offset refer to the beginning of shader memory?
188 state.program_counter = &shader_memory[instr.flow_control.offset_words];
189 break;
190
191 case Instruction::OpCode::FLS:
192 // TODO: Do whatever needs to be done here?
193 break;
194
195 default:
196 ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
197 (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex);
198 break;
199 }
200
201 if (increment_pc)
202 ++state.program_counter;
203
204 if (exit_loop)
205 break;
206 }
207}
208
209OutputVertex RunShader(const InputVertex& input, int num_attributes)
210{
211 VertexShaderState state;
212
213 const u32* main = &shader_memory[registers.vs_main_offset];
214 state.program_counter = (u32*)main;
215
216 // Setup input register table
217 const auto& attribute_register_map = registers.vs_input_register_map;
218 float24 dummy_register;
219 std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register);
220 if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
221 if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
222 if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
223 if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
224 if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
225 if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
226 if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
227 if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
228 if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
229 if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
230 if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
231 if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
232 if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
233 if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
234 if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
235 if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
236
237 // Setup output register table
238 OutputVertex ret;
239 for (int i = 0; i < 7; ++i) {
240 const auto& output_register_map = registers.vs_output_attributes[i];
241
242 u32 semantics[4] = {
243 output_register_map.map_x, output_register_map.map_y,
244 output_register_map.map_z, output_register_map.map_w
245 };
246
247 for (int comp = 0; comp < 4; ++comp)
248 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
249 }
250
251 state.status_registers[0] = false;
252 state.status_registers[1] = false;
253 std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]),
254 VertexShaderState::INVALID_ADDRESS);
255 state.call_stack_pointer = &state.call_stack[0];
256
257 ProcessShaderCode(state);
258
259 DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
260 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
261 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
262 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
263
264 return ret;
265}
266
267
268} // namespace
269
270} // namespace
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
new file mode 100644
index 000000000..1b71e367b
--- /dev/null
+++ b/src/video_core/vertex_shader.h
@@ -0,0 +1,211 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <initializer_list>
8
9#include <common/common_types.h>
10
11#include "math.h"
12#include "pica.h"
13
14namespace Pica {
15
16namespace VertexShader {
17
18struct InputVertex {
19 Math::Vec4<float24> attr[16];
20};
21
22struct OutputVertex {
23 OutputVertex() = default;
24
25 // VS output attributes
26 Math::Vec4<float24> pos;
27 Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
28 Math::Vec4<float24> color;
29 Math::Vec2<float24> tc0;
30 float24 tc0_v;
31
32 // Padding for optimal alignment
33 float24 pad[14];
34
35 // Attributes used to store intermediate results
36
37 // position after perspective divide
38 Math::Vec3<float24> screenpos;
39
40 // Linear interpolation
41 // factor: 0=this, 1=vtx
42 void Lerp(float24 factor, const OutputVertex& vtx) {
43 pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
44
45 // TODO: Should perform perspective correct interpolation here...
46 tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
47
48 screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
49
50 color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
51 }
52
53 // Linear interpolation
54 // factor: 0=v0, 1=v1
55 static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
56 OutputVertex ret = v0;
57 ret.Lerp(factor, v1);
58 return ret;
59 }
60};
61static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
62
63union Instruction {
64 enum class OpCode : u32 {
65 ADD = 0x0,
66 DP3 = 0x1,
67 DP4 = 0x2,
68
69 MUL = 0x8,
70
71 MAX = 0xC,
72 MIN = 0xD,
73 RCP = 0xE,
74 RSQ = 0xF,
75
76 MOV = 0x13,
77
78 RET = 0x21,
79 FLS = 0x22, // Flush
80 CALL = 0x24,
81 };
82
83 std::string GetOpCodeName() const {
84 std::map<OpCode, std::string> map = {
85 { OpCode::ADD, "ADD" },
86 { OpCode::DP3, "DP3" },
87 { OpCode::DP4, "DP4" },
88 { OpCode::MUL, "MUL" },
89 { OpCode::MAX, "MAX" },
90 { OpCode::MIN, "MIN" },
91 { OpCode::RCP, "RCP" },
92 { OpCode::RSQ, "RSQ" },
93 { OpCode::MOV, "MOV" },
94 { OpCode::RET, "RET" },
95 { OpCode::FLS, "FLS" },
96 };
97 auto it = map.find(opcode);
98 if (it == map.end())
99 return "UNK";
100 else
101 return it->second;
102 }
103
104 u32 hex;
105
106 BitField<0x1a, 0x6, OpCode> opcode;
107
108 // General notes:
109 //
110 // When two input registers are used, one of them uses a 5-bit index while the other
111 // one uses a 7-bit index. This is because at most one floating point uniform may be used
112 // as an input.
113
114
115 // Format used e.g. by arithmetic instructions and comparisons
116 // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats),
117 // while "dest" addresses individual floats.
118 union {
119 BitField<0x00, 0x5, u32> operand_desc_id;
120 BitField<0x07, 0x5, u32> src2;
121 BitField<0x0c, 0x7, u32> src1;
122 BitField<0x13, 0x7, u32> dest;
123 } common;
124
125 // Format used for flow control instructions ("if")
126 union {
127 BitField<0x00, 0x8, u32> num_instructions;
128 BitField<0x0a, 0xc, u32> offset_words;
129 } flow_control;
130};
131
132union SwizzlePattern {
133 u32 hex;
134
135 enum class Selector : u32 {
136 x = 0,
137 y = 1,
138 z = 2,
139 w = 3
140 };
141
142 Selector GetSelectorSrc1(int comp) const {
143 Selector selectors[] = {
144 src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3
145 };
146 return selectors[comp];
147 }
148
149 Selector GetSelectorSrc2(int comp) const {
150 Selector selectors[] = {
151 src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3
152 };
153 return selectors[comp];
154 }
155
156 bool DestComponentEnabled(int i) const {
157 return (dest_mask & (0x8 >> i));
158 }
159
160 std::string SelectorToString(bool src2) const {
161 std::map<Selector, std::string> map = {
162 { Selector::x, "x" },
163 { Selector::y, "y" },
164 { Selector::z, "z" },
165 { Selector::w, "w" }
166 };
167 std::string ret;
168 for (int i = 0; i < 4; ++i) {
169 ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i));
170 }
171 return ret;
172 }
173
174 std::string DestMaskToString() const {
175 std::string ret;
176 for (int i = 0; i < 4; ++i) {
177 if (!DestComponentEnabled(i))
178 ret += "_";
179 else
180 ret += "xyzw"[i];
181 }
182 return ret;
183 }
184
185 // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
186 BitField< 0, 4, u32> dest_mask;
187
188 BitField< 5, 2, Selector> src1_selector_3;
189 BitField< 7, 2, Selector> src1_selector_2;
190 BitField< 9, 2, Selector> src1_selector_1;
191 BitField<11, 2, Selector> src1_selector_0;
192
193 BitField<14, 2, Selector> src2_selector_3;
194 BitField<16, 2, Selector> src2_selector_2;
195 BitField<18, 2, Selector> src2_selector_1;
196 BitField<20, 2, Selector> src2_selector_0;
197
198 BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign?
199};
200
201void SubmitShaderMemoryChange(u32 addr, u32 value);
202void SubmitSwizzleDataChange(u32 addr, u32 value);
203
204OutputVertex RunShader(const InputVertex& input, int num_attributes);
205
206Math::Vec4<float24>& GetFloatUniform(u32 index);
207
208} // namespace
209
210} // namespace
211
diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj
index d77be2bef..48d77cdc4 100644
--- a/src/video_core/video_core.vcxproj
+++ b/src/video_core/video_core.vcxproj
@@ -20,14 +20,25 @@
20 </ItemGroup> 20 </ItemGroup>
21 <ItemGroup> 21 <ItemGroup>
22 <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> 22 <ClCompile Include="renderer_opengl\renderer_opengl.cpp" />
23 <ClCompile Include="clipper.cpp" />
24 <ClCompile Include="command_processor.cpp" />
25 <ClCompile Include="primitive_assembly.cpp" />
26 <ClCompile Include="rasterizer.cpp" />
23 <ClCompile Include="utils.cpp" /> 27 <ClCompile Include="utils.cpp" />
28 <ClCompile Include="vertex_shader.cpp" />
24 <ClCompile Include="video_core.cpp" /> 29 <ClCompile Include="video_core.cpp" />
25 </ItemGroup> 30 </ItemGroup>
26 <ItemGroup> 31 <ItemGroup>
32 <ClInclude Include="clipper.h" />
33 <ClInclude Include="command_processor.h" />
27 <ClInclude Include="gpu_debugger.h" /> 34 <ClInclude Include="gpu_debugger.h" />
35 <ClInclude Include="math.h" />
28 <ClInclude Include="pica.h" /> 36 <ClInclude Include="pica.h" />
37 <ClInclude Include="primitive_assembly.h" />
38 <ClInclude Include="rasterizer.h" />
29 <ClInclude Include="renderer_base.h" /> 39 <ClInclude Include="renderer_base.h" />
30 <ClInclude Include="utils.h" /> 40 <ClInclude Include="utils.h" />
41 <ClInclude Include="vertex_shader.h" />
31 <ClInclude Include="video_core.h" /> 42 <ClInclude Include="video_core.h" />
32 <ClInclude Include="renderer_opengl\renderer_opengl.h" /> 43 <ClInclude Include="renderer_opengl\renderer_opengl.h" />
33 </ItemGroup> 44 </ItemGroup>
diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters
index b89ac1ac4..31af4f1df 100644
--- a/src/video_core/video_core.vcxproj.filters
+++ b/src/video_core/video_core.vcxproj.filters
@@ -9,17 +9,28 @@
9 <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> 9 <ClCompile Include="renderer_opengl\renderer_opengl.cpp">
10 <Filter>renderer_opengl</Filter> 10 <Filter>renderer_opengl</Filter>
11 </ClCompile> 11 </ClCompile>
12 <ClCompile Include="clipper.cpp" />
13 <ClCompile Include="command_processor.cpp" />
14 <ClCompile Include="primitive_assembly.cpp" />
15 <ClCompile Include="rasterizer.cpp" />
12 <ClCompile Include="utils.cpp" /> 16 <ClCompile Include="utils.cpp" />
17 <ClCompile Include="vertex_shader.cpp" />
13 <ClCompile Include="video_core.cpp" /> 18 <ClCompile Include="video_core.cpp" />
14 </ItemGroup> 19 </ItemGroup>
15 <ItemGroup> 20 <ItemGroup>
16 <ClInclude Include="renderer_opengl\renderer_opengl.h"> 21 <ClInclude Include="renderer_opengl\renderer_opengl.h">
17 <Filter>renderer_opengl</Filter> 22 <Filter>renderer_opengl</Filter>
18 </ClInclude> 23 </ClInclude>
24 <ClInclude Include="clipper.h" />
25 <ClInclude Include="command_processor.h" />
19 <ClInclude Include="gpu_debugger.h" /> 26 <ClInclude Include="gpu_debugger.h" />
27 <ClInclude Include="math.h" />
20 <ClInclude Include="pica.h" /> 28 <ClInclude Include="pica.h" />
29 <ClInclude Include="primitive_assembly.h" />
30 <ClInclude Include="rasterizer.h" />
21 <ClInclude Include="renderer_base.h" /> 31 <ClInclude Include="renderer_base.h" />
22 <ClInclude Include="utils.h" /> 32 <ClInclude Include="utils.h" />
33 <ClInclude Include="vertex_shader.h" />
23 <ClInclude Include="video_core.h" /> 34 <ClInclude Include="video_core.h" />
24 </ItemGroup> 35 </ItemGroup>
25 <ItemGroup> 36 <ItemGroup>