diff options
Diffstat (limited to 'src')
27 files changed, 2695 insertions, 627 deletions
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 195197ef5..e98560a19 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp | |||
| @@ -78,12 +78,12 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const | |||
| 78 | // index refers to a specific command | 78 | // index refers to a specific command |
| 79 | const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second; | 79 | const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second; |
| 80 | const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index]; | 80 | const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index]; |
| 81 | const Pica::CommandHeader& header = cmd.GetHeader(); | 81 | const Pica::CommandProcessor::CommandHeader& header = cmd.GetHeader(); |
| 82 | 82 | ||
| 83 | if (role == Qt::DisplayRole) { | 83 | if (role == Qt::DisplayRole) { |
| 84 | QString content; | 84 | QString content; |
| 85 | if (index.column() == 0) { | 85 | if (index.column() == 0) { |
| 86 | content = Pica::command_names[header.cmd_id]; | 86 | content = QString::fromLatin1(Pica::Regs::GetCommandName(header.cmd_id).c_str()); |
| 87 | content.append(" "); | 87 | content.append(" "); |
| 88 | } else if (index.column() == 1) { | 88 | } else if (index.column() == 1) { |
| 89 | for (int j = 0; j < cmd.size(); ++j) | 89 | for (int j = 0; j < cmd.size(); ++j) |
diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj index 1f5c714c3..341d3a813 100644 --- a/src/common/common.vcxproj +++ b/src/common/common.vcxproj | |||
| @@ -182,7 +182,6 @@ | |||
| 182 | <ClInclude Include="mem_arena.h" /> | 182 | <ClInclude Include="mem_arena.h" /> |
| 183 | <ClInclude Include="msg_handler.h" /> | 183 | <ClInclude Include="msg_handler.h" /> |
| 184 | <ClInclude Include="platform.h" /> | 184 | <ClInclude Include="platform.h" /> |
| 185 | <ClInclude Include="register_set.h" /> | ||
| 186 | <ClInclude Include="scm_rev.h" /> | 185 | <ClInclude Include="scm_rev.h" /> |
| 187 | <ClInclude Include="std_condition_variable.h" /> | 186 | <ClInclude Include="std_condition_variable.h" /> |
| 188 | <ClInclude Include="std_mutex.h" /> | 187 | <ClInclude Include="std_mutex.h" /> |
diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters index e8c4ce360..59268ce5a 100644 --- a/src/common/common.vcxproj.filters +++ b/src/common/common.vcxproj.filters | |||
| @@ -29,7 +29,6 @@ | |||
| 29 | <ClInclude Include="memory_util.h" /> | 29 | <ClInclude Include="memory_util.h" /> |
| 30 | <ClInclude Include="msg_handler.h" /> | 30 | <ClInclude Include="msg_handler.h" /> |
| 31 | <ClInclude Include="platform.h" /> | 31 | <ClInclude Include="platform.h" /> |
| 32 | <ClInclude Include="register_set.h" /> | ||
| 33 | <ClInclude Include="std_condition_variable.h" /> | 32 | <ClInclude Include="std_condition_variable.h" /> |
| 34 | <ClInclude Include="std_mutex.h" /> | 33 | <ClInclude Include="std_mutex.h" /> |
| 35 | <ClInclude Include="std_thread.h" /> | 34 | <ClInclude Include="std_thread.h" /> |
diff --git a/src/common/register_set.h b/src/common/register_set.h deleted file mode 100644 index ba19a2614..000000000 --- a/src/common/register_set.h +++ /dev/null | |||
| @@ -1,163 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | // Copyright 2014 Tony Wasserka | ||
| 8 | // All rights reserved. | ||
| 9 | // | ||
| 10 | // Redistribution and use in source and binary forms, with or without | ||
| 11 | // modification, are permitted provided that the following conditions are met: | ||
| 12 | // | ||
| 13 | // * Redistributions of source code must retain the above copyright | ||
| 14 | // notice, this list of conditions and the following disclaimer. | ||
| 15 | // * Redistributions in binary form must reproduce the above copyright | ||
| 16 | // notice, this list of conditions and the following disclaimer in the | ||
| 17 | // documentation and/or other materials provided with the distribution. | ||
| 18 | // * Neither the name of the owner nor the names of its contributors may | ||
| 19 | // be used to endorse or promote products derived from this software | ||
| 20 | // without specific prior written permission. | ||
| 21 | // | ||
| 22 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 23 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 24 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 25 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 26 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 27 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 28 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 29 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 30 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 31 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 32 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Standardized way to define a group of registers and corresponding data structures. To define | ||
| 36 | * a new register set, first define struct containing an enumeration called "Id" containing | ||
| 37 | * all register IDs and a template struct called "Struct". Specialize the Struct struct for any | ||
| 38 | * register ID which needs to be accessed in a specialized way. You can then declare the object | ||
| 39 | * containing all register values using the RegisterSet<BaseType, DefiningStruct> type, where | ||
| 40 | * BaseType is the underlying type of each register (e.g. u32). | ||
| 41 | * Of course, you'll usually want to implement the Struct template such that they are of the same | ||
| 42 | * size as BaseType. However, it's also possible to make it larger, e.g. when you want to describe | ||
| 43 | * multiple registers with the same structure. | ||
| 44 | * | ||
| 45 | * Example: | ||
| 46 | * | ||
| 47 | * struct Regs { | ||
| 48 | * enum Id : u32 { | ||
| 49 | * Value1 = 0, | ||
| 50 | * Value2 = 1, | ||
| 51 | * Value3 = 2, | ||
| 52 | * NumIds = 3 | ||
| 53 | * }; | ||
| 54 | * | ||
| 55 | * // declare register definition structures | ||
| 56 | * template<Id id> | ||
| 57 | * struct Struct; | ||
| 58 | * }; | ||
| 59 | * | ||
| 60 | * // Define register set object | ||
| 61 | * RegisterSet<u32, CommandIds> registers; | ||
| 62 | * | ||
| 63 | * // define register definition structures | ||
| 64 | * template<> | ||
| 65 | * struct Regs::Struct<Regs::Value1> { | ||
| 66 | * union { | ||
| 67 | * BitField<0, 4, u32> some_field; | ||
| 68 | * BitField<4, 3, u32> some_other_field; | ||
| 69 | * }; | ||
| 70 | * }; | ||
| 71 | * | ||
| 72 | * Usage in external code (within SomeNamespace scope): | ||
| 73 | * | ||
| 74 | * For a register which maps to a single index: | ||
| 75 | * registers.Get<Regs::Value1>().some_field = some_value; | ||
| 76 | * | ||
| 77 | * For a register which maps to different indices, e.g. a group of similar registers | ||
| 78 | * registers.Get<Regs::Value1>(index).some_field = some_value; | ||
| 79 | * | ||
| 80 | * | ||
| 81 | * @tparam BaseType Base type used for storing individual registers, e.g. u32 | ||
| 82 | * @tparam RegDefinition Class defining an enumeration called "Id" and a template<Id id> struct, as described above. | ||
| 83 | * @note RegDefinition::Id needs to have an enum value called NumIds defining the number of registers to be allocated. | ||
| 84 | */ | ||
| 85 | template<typename BaseType, typename RegDefinition> | ||
| 86 | struct RegisterSet { | ||
| 87 | // Register IDs | ||
| 88 | using Id = typename RegDefinition::Id; | ||
| 89 | |||
| 90 | // type used for *this | ||
| 91 | using ThisType = RegisterSet<BaseType, RegDefinition>; | ||
| 92 | |||
| 93 | // Register definition structs, defined in RegDefinition | ||
| 94 | template<Id id> | ||
| 95 | using Struct = typename RegDefinition::template Struct<id>; | ||
| 96 | |||
| 97 | |||
| 98 | /* | ||
| 99 | * Lookup register with the given id and return it as the corresponding structure type. | ||
| 100 | * @note This just forwards the arguments to Get(Id). | ||
| 101 | */ | ||
| 102 | template<Id id> | ||
| 103 | const Struct<id>& Get() const { | ||
| 104 | return Get<id>(id); | ||
| 105 | } | ||
| 106 | |||
| 107 | /* | ||
| 108 | * Lookup register with the given id and return it as the corresponding structure type. | ||
| 109 | * @note This just forwards the arguments to Get(Id). | ||
| 110 | */ | ||
| 111 | template<Id id> | ||
| 112 | Struct<id>& Get() { | ||
| 113 | return Get<id>(id); | ||
| 114 | } | ||
| 115 | |||
| 116 | /* | ||
| 117 | * Lookup register with the given index and return it as the corresponding structure type. | ||
| 118 | * @todo Is this portable with regards to structures larger than BaseType? | ||
| 119 | * @note if index==id, you don't need to specify the function parameter. | ||
| 120 | */ | ||
| 121 | template<Id id> | ||
| 122 | const Struct<id>& Get(const Id& index) const { | ||
| 123 | const int idx = static_cast<size_t>(index); | ||
| 124 | return *reinterpret_cast<const Struct<id>*>(&raw[idx]); | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Lookup register with the given index and return it as the corresponding structure type. | ||
| 129 | * @note This just forwards the arguments to the const version of Get(Id). | ||
| 130 | * @note if index==id, you don't need to specify the function parameter. | ||
| 131 | */ | ||
| 132 | template<Id id> | ||
| 133 | Struct<id>& Get(const Id& index) { | ||
| 134 | return const_cast<Struct<id>&>(GetThis().Get<id>(index)); | ||
| 135 | } | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Plain array access. | ||
| 139 | * @note If you want to have this casted to a register defininition struct, use Get() instead. | ||
| 140 | */ | ||
| 141 | const BaseType& operator[] (const Id& id) const { | ||
| 142 | return raw[static_cast<size_t>(id)]; | ||
| 143 | } | ||
| 144 | |||
| 145 | /* | ||
| 146 | * Plain array access. | ||
| 147 | * @note If you want to have this casted to a register defininition struct, use Get() instead. | ||
| 148 | * @note This operator just forwards its argument to the const version. | ||
| 149 | */ | ||
| 150 | BaseType& operator[] (const Id& id) { | ||
| 151 | return const_cast<BaseType&>(GetThis()[id]); | ||
| 152 | } | ||
| 153 | |||
| 154 | private: | ||
| 155 | /* | ||
| 156 | * Returns a const reference to "this". | ||
| 157 | */ | ||
| 158 | const ThisType& GetThis() const { | ||
| 159 | return static_cast<const ThisType&>(*this); | ||
| 160 | } | ||
| 161 | |||
| 162 | BaseType raw[Id::NumIds]; | ||
| 163 | }; | ||
diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index e241b31c8..635f50a53 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp | |||
| @@ -32,7 +32,7 @@ static inline u8* GetCommandBuffer(u32 thread_id) { | |||
| 32 | if (0 == g_shared_memory) | 32 | if (0 == g_shared_memory) |
| 33 | return nullptr; | 33 | return nullptr; |
| 34 | 34 | ||
| 35 | return Kernel::GetSharedMemoryPointer(g_shared_memory, | 35 | return Kernel::GetSharedMemoryPointer(g_shared_memory, |
| 36 | 0x800 + (thread_id * sizeof(CommandBuffer))); | 36 | 0x800 + (thread_id * sizeof(CommandBuffer))); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| @@ -173,11 +173,11 @@ void ExecuteCommand(const Command& command) { | |||
| 173 | case CommandId::SET_COMMAND_LIST_LAST: | 173 | case CommandId::SET_COMMAND_LIST_LAST: |
| 174 | { | 174 | { |
| 175 | auto& params = command.set_command_list_last; | 175 | auto& params = command.set_command_list_last; |
| 176 | WriteGPURegister(GPU::Regs::CommandProcessor + 2, params.address >> 3); | 176 | WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), Memory::VirtualToPhysicalAddress(params.address) >> 3); |
| 177 | WriteGPURegister(GPU::Regs::CommandProcessor, params.size >> 3); | 177 | WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3); |
| 178 | 178 | ||
| 179 | // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though | 179 | // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though |
| 180 | WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); | 180 | WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1); |
| 181 | 181 | ||
| 182 | // TODO: Move this to GPU | 182 | // TODO: Move this to GPU |
| 183 | // TODO: Not sure what units the size is measured in | 183 | // TODO: Not sure what units the size is measured in |
| @@ -193,20 +193,28 @@ void ExecuteCommand(const Command& command) { | |||
| 193 | case CommandId::SET_MEMORY_FILL: | 193 | case CommandId::SET_MEMORY_FILL: |
| 194 | { | 194 | { |
| 195 | auto& params = command.memory_fill; | 195 | auto& params = command.memory_fill; |
| 196 | WriteGPURegister(GPU::Regs::MemoryFill, params.start1 >> 3); | 196 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), Memory::VirtualToPhysicalAddress(params.start1) >> 3); |
| 197 | WriteGPURegister(GPU::Regs::MemoryFill + 1, params.end1 >> 3); | 197 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), Memory::VirtualToPhysicalAddress(params.end1) >> 3); |
| 198 | WriteGPURegister(GPU::Regs::MemoryFill + 2, params.end1 - params.start1); | 198 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1); |
| 199 | WriteGPURegister(GPU::Regs::MemoryFill + 3, params.value1); | 199 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1); |
| 200 | 200 | ||
| 201 | WriteGPURegister(GPU::Regs::MemoryFill + 4, params.start2 >> 3); | 201 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), Memory::VirtualToPhysicalAddress(params.start2) >> 3); |
| 202 | WriteGPURegister(GPU::Regs::MemoryFill + 5, params.end2 >> 3); | 202 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), Memory::VirtualToPhysicalAddress(params.end2) >> 3); |
| 203 | WriteGPURegister(GPU::Regs::MemoryFill + 6, params.end2 - params.start2); | 203 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2); |
| 204 | WriteGPURegister(GPU::Regs::MemoryFill + 7, params.value2); | 204 | WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2); |
| 205 | break; | 205 | break; |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | // TODO: Check if texture copies are implemented correctly.. | ||
| 209 | case CommandId::SET_DISPLAY_TRANSFER: | 208 | case CommandId::SET_DISPLAY_TRANSFER: |
| 209 | { | ||
| 210 | auto& params = command.image_copy; | ||
| 211 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||
| 212 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | ||
| 213 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size); | ||
| 214 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size); | ||
| 215 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); | ||
| 216 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||
| 217 | |||
| 210 | // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to | 218 | // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to |
| 211 | // work well enough for running demos. Need to figure out how these all work and trigger | 219 | // work well enough for running demos. Need to figure out how these all work and trigger |
| 212 | // them correctly. | 220 | // them correctly. |
| @@ -216,19 +224,20 @@ void ExecuteCommand(const Command& command) { | |||
| 216 | SignalInterrupt(InterruptId::P3D); | 224 | SignalInterrupt(InterruptId::P3D); |
| 217 | SignalInterrupt(InterruptId::DMA); | 225 | SignalInterrupt(InterruptId::DMA); |
| 218 | break; | 226 | break; |
| 227 | } | ||
| 219 | 228 | ||
| 229 | // TODO: Check if texture copies are implemented correctly.. | ||
| 220 | case CommandId::SET_TEXTURE_COPY: | 230 | case CommandId::SET_TEXTURE_COPY: |
| 221 | { | 231 | { |
| 222 | auto& params = command.image_copy; | 232 | auto& params = command.image_copy; |
| 223 | WriteGPURegister(GPU::Regs::DisplayTransfer, params.in_buffer_address >> 3); | 233 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |
| 224 | WriteGPURegister(GPU::Regs::DisplayTransfer + 1, params.out_buffer_address >> 3); | 234 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); |
| 225 | WriteGPURegister(GPU::Regs::DisplayTransfer + 3, params.in_buffer_size); | 235 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size); |
| 226 | WriteGPURegister(GPU::Regs::DisplayTransfer + 2, params.out_buffer_size); | 236 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size); |
| 227 | WriteGPURegister(GPU::Regs::DisplayTransfer + 4, params.flags); | 237 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); |
| 228 | 238 | ||
| 229 | // TODO: Should this only be ORed with 1 for texture copies? | 239 | // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1? |
| 230 | // trigger transfer | 240 | WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); |
| 231 | WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1); | ||
| 232 | break; | 241 | break; |
| 233 | } | 242 | } |
| 234 | 243 | ||
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index d94c2329b..87cf93bac 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -14,106 +14,29 @@ | |||
| 14 | 14 | ||
| 15 | #include "core/hw/gpu.h" | 15 | #include "core/hw/gpu.h" |
| 16 | 16 | ||
| 17 | #include "video_core/command_processor.h" | ||
| 17 | #include "video_core/video_core.h" | 18 | #include "video_core/video_core.h" |
| 18 | 19 | ||
| 19 | 20 | ||
| 20 | namespace GPU { | 21 | namespace GPU { |
| 21 | 22 | ||
| 22 | RegisterSet<u32, Regs> g_regs; | 23 | Regs g_regs; |
| 23 | 24 | ||
| 24 | u32 g_cur_line = 0; ///< Current vertical screen line | 25 | u32 g_cur_line = 0; ///< Current vertical screen line |
| 25 | u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line | 26 | u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line |
| 26 | 27 | ||
| 27 | /** | ||
| 28 | * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM | ||
| 29 | * @param | ||
| 30 | */ | ||
| 31 | void SetFramebufferLocation(const FramebufferLocation mode) { | ||
| 32 | switch (mode) { | ||
| 33 | case FRAMEBUFFER_LOCATION_FCRAM: | ||
| 34 | { | ||
| 35 | auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); | ||
| 36 | auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); | ||
| 37 | |||
| 38 | framebuffer_top.address_left1 = PADDR_TOP_LEFT_FRAME1; | ||
| 39 | framebuffer_top.address_left2 = PADDR_TOP_LEFT_FRAME2; | ||
| 40 | framebuffer_top.address_right1 = PADDR_TOP_RIGHT_FRAME1; | ||
| 41 | framebuffer_top.address_right2 = PADDR_TOP_RIGHT_FRAME2; | ||
| 42 | framebuffer_sub.address_left1 = PADDR_SUB_FRAME1; | ||
| 43 | //framebuffer_sub.address_left2 = unknown; | ||
| 44 | framebuffer_sub.address_right1 = PADDR_SUB_FRAME2; | ||
| 45 | //framebuffer_sub.address_right2 = unknown; | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | |||
| 49 | case FRAMEBUFFER_LOCATION_VRAM: | ||
| 50 | { | ||
| 51 | auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); | ||
| 52 | auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); | ||
| 53 | |||
| 54 | framebuffer_top.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1; | ||
| 55 | framebuffer_top.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2; | ||
| 56 | framebuffer_top.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1; | ||
| 57 | framebuffer_top.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2; | ||
| 58 | framebuffer_sub.address_left1 = PADDR_VRAM_SUB_FRAME1; | ||
| 59 | //framebuffer_sub.address_left2 = unknown; | ||
| 60 | framebuffer_sub.address_right1 = PADDR_VRAM_SUB_FRAME2; | ||
| 61 | //framebuffer_sub.address_right2 = unknown; | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | /** | ||
| 68 | * Gets the location of the framebuffers | ||
| 69 | * @return Location of framebuffers as FramebufferLocation enum | ||
| 70 | */ | ||
| 71 | FramebufferLocation GetFramebufferLocation(u32 address) { | ||
| 72 | if ((address & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) { | ||
| 73 | return FRAMEBUFFER_LOCATION_VRAM; | ||
| 74 | } else if ((address & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) { | ||
| 75 | return FRAMEBUFFER_LOCATION_FCRAM; | ||
| 76 | } else { | ||
| 77 | ERROR_LOG(GPU, "unknown framebuffer location!"); | ||
| 78 | } | ||
| 79 | return FRAMEBUFFER_LOCATION_UNKNOWN; | ||
| 80 | } | ||
| 81 | |||
| 82 | u32 GetFramebufferAddr(const u32 address) { | ||
| 83 | switch (GetFramebufferLocation(address)) { | ||
| 84 | case FRAMEBUFFER_LOCATION_FCRAM: | ||
| 85 | return Memory::VirtualAddressFromPhysical_FCRAM(address); | ||
| 86 | case FRAMEBUFFER_LOCATION_VRAM: | ||
| 87 | return Memory::VirtualAddressFromPhysical_VRAM(address); | ||
| 88 | default: | ||
| 89 | ERROR_LOG(GPU, "unknown framebuffer location"); | ||
| 90 | } | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | /** | ||
| 95 | * Gets a read-only pointer to a framebuffer in memory | ||
| 96 | * @param address Physical address of framebuffer | ||
| 97 | * @return Returns const pointer to raw framebuffer | ||
| 98 | */ | ||
| 99 | const u8* GetFramebufferPointer(const u32 address) { | ||
| 100 | u32 addr = GetFramebufferAddr(address); | ||
| 101 | return (addr != 0) ? Memory::GetPointer(addr) : nullptr; | ||
| 102 | } | ||
| 103 | |||
| 104 | template <typename T> | 28 | template <typename T> |
| 105 | inline void Read(T &var, const u32 raw_addr) { | 29 | inline void Read(T &var, const u32 raw_addr) { |
| 106 | u32 addr = raw_addr - 0x1EF00000; | 30 | u32 addr = raw_addr - 0x1EF00000; |
| 107 | int index = addr / 4; | 31 | int index = addr / 4; |
| 108 | 32 | ||
| 109 | // Reads other than u32 are untested, so I'd rather have them abort than silently fail | 33 | // Reads other than u32 are untested, so I'd rather have them abort than silently fail |
| 110 | if (index >= Regs::NumIds || !std::is_same<T,u32>::value) | 34 | if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) { |
| 111 | { | ||
| 112 | ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); | 35 | ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); |
| 113 | return; | 36 | return; |
| 114 | } | 37 | } |
| 115 | 38 | ||
| 116 | var = g_regs[static_cast<Regs::Id>(addr / 4)]; | 39 | var = g_regs[addr / 4]; |
| 117 | } | 40 | } |
| 118 | 41 | ||
| 119 | template <typename T> | 42 | template <typename T> |
| @@ -122,28 +45,28 @@ inline void Write(u32 addr, const T data) { | |||
| 122 | int index = addr / 4; | 45 | int index = addr / 4; |
| 123 | 46 | ||
| 124 | // Writes other than u32 are untested, so I'd rather have them abort than silently fail | 47 | // Writes other than u32 are untested, so I'd rather have them abort than silently fail |
| 125 | if (index >= Regs::NumIds || !std::is_same<T,u32>::value) | 48 | if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) { |
| 126 | { | ||
| 127 | ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); | 49 | ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); |
| 128 | return; | 50 | return; |
| 129 | } | 51 | } |
| 130 | 52 | ||
| 131 | g_regs[static_cast<Regs::Id>(index)] = data; | 53 | g_regs[index] = data; |
| 132 | 54 | ||
| 133 | switch (static_cast<Regs::Id>(index)) { | 55 | switch (index) { |
| 134 | 56 | ||
| 135 | // Memory fills are triggered once the fill value is written. | 57 | // Memory fills are triggered once the fill value is written. |
| 136 | // NOTE: This is not verified. | 58 | // NOTE: This is not verified. |
| 137 | case Regs::MemoryFill + 3: | 59 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3): |
| 138 | case Regs::MemoryFill + 7: | 60 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3): |
| 139 | { | 61 | { |
| 140 | const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3)); | 62 | const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value)); |
| 63 | const auto& config = g_regs.memory_fill_config[is_second_filler]; | ||
| 141 | 64 | ||
| 142 | // TODO: Not sure if this check should be done at GSP level instead | 65 | // TODO: Not sure if this check should be done at GSP level instead |
| 143 | if (config.address_start) { | 66 | if (config.address_start) { |
| 144 | // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all | 67 | // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all |
| 145 | u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); | 68 | u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress())); |
| 146 | u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); | 69 | u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress())); |
| 147 | for (u32* ptr = start; ptr < end; ++ptr) | 70 | for (u32* ptr = start; ptr < end; ++ptr) |
| 148 | *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation | 71 | *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation |
| 149 | 72 | ||
| @@ -152,12 +75,12 @@ inline void Write(u32 addr, const T data) { | |||
| 152 | break; | 75 | break; |
| 153 | } | 76 | } |
| 154 | 77 | ||
| 155 | case Regs::DisplayTransfer + 6: | 78 | case GPU_REG_INDEX(display_transfer_config.trigger): |
| 156 | { | 79 | { |
| 157 | const auto& config = g_regs.Get<Regs::DisplayTransfer>(); | 80 | const auto& config = g_regs.display_transfer_config; |
| 158 | if (config.trigger & 1) { | 81 | if (config.trigger & 1) { |
| 159 | u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress()); | 82 | u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress())); |
| 160 | u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress()); | 83 | u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress())); |
| 161 | 84 | ||
| 162 | for (int y = 0; y < config.output_height; ++y) { | 85 | for (int y = 0; y < config.output_height; ++y) { |
| 163 | // TODO: Why does the register seem to hold twice the framebuffer width? | 86 | // TODO: Why does the register seem to hold twice the framebuffer width? |
| @@ -221,14 +144,15 @@ inline void Write(u32 addr, const T data) { | |||
| 221 | break; | 144 | break; |
| 222 | } | 145 | } |
| 223 | 146 | ||
| 224 | case Regs::CommandProcessor + 4: | 147 | // Seems like writing to this register triggers processing |
| 148 | case GPU_REG_INDEX(command_processor_config.trigger): | ||
| 225 | { | 149 | { |
| 226 | const auto& config = g_regs.Get<Regs::CommandProcessor>(); | 150 | const auto& config = g_regs.command_processor_config; |
| 227 | if (config.trigger & 1) | 151 | if (config.trigger & 1) |
| 228 | { | 152 | { |
| 229 | // u32* buffer = (u32*)Memory::GetPointer(config.address << 3); | 153 | u32* buffer = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalAddress())); |
| 230 | ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3); | 154 | u32 size = config.size << 3; |
| 231 | // TODO: Process command list! | 155 | Pica::CommandProcessor::ProcessCommandList(buffer, size); |
| 232 | } | 156 | } |
| 233 | break; | 157 | break; |
| 234 | } | 158 | } |
| @@ -252,7 +176,7 @@ template void Write<u8>(u32 addr, const u8 data); | |||
| 252 | 176 | ||
| 253 | /// Update hardware | 177 | /// Update hardware |
| 254 | void Update() { | 178 | void Update() { |
| 255 | auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); | 179 | auto& framebuffer_top = g_regs.framebuffer_config[0]; |
| 256 | u64 current_ticks = Core::g_app_core->GetTicks(); | 180 | u64 current_ticks = Core::g_app_core->GetTicks(); |
| 257 | 181 | ||
| 258 | // Synchronize line... | 182 | // Synchronize line... |
| @@ -277,11 +201,22 @@ void Init() { | |||
| 277 | g_cur_line = 0; | 201 | g_cur_line = 0; |
| 278 | g_last_line_ticks = Core::g_app_core->GetTicks(); | 202 | g_last_line_ticks = Core::g_app_core->GetTicks(); |
| 279 | 203 | ||
| 280 | // SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); | 204 | auto& framebuffer_top = g_regs.framebuffer_config[0]; |
| 281 | SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); | 205 | auto& framebuffer_sub = g_regs.framebuffer_config[1]; |
| 206 | |||
| 207 | // Setup default framebuffer addresses (located in VRAM) | ||
| 208 | // .. or at least these are the ones used by system applets. | ||
| 209 | // There's probably a smarter way to come up with addresses | ||
| 210 | // like this which does not require hardcoding. | ||
| 211 | framebuffer_top.address_left1 = 0x181E6000; | ||
| 212 | framebuffer_top.address_left2 = 0x1822C800; | ||
| 213 | framebuffer_top.address_right1 = 0x18273000; | ||
| 214 | framebuffer_top.address_right2 = 0x182B9800; | ||
| 215 | framebuffer_sub.address_left1 = 0x1848F000; | ||
| 216 | //framebuffer_sub.address_left2 = unknown; | ||
| 217 | framebuffer_sub.address_right1 = 0x184C7800; | ||
| 218 | //framebuffer_sub.address_right2 = unknown; | ||
| 282 | 219 | ||
| 283 | auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); | ||
| 284 | auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); | ||
| 285 | // TODO: Width should be 240 instead? | 220 | // TODO: Width should be 240 instead? |
| 286 | framebuffer_top.width = 480; | 221 | framebuffer_top.width = 480; |
| 287 | framebuffer_top.height = 400; | 222 | framebuffer_top.height = 400; |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 42f18a0e7..d20311a00 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h | |||
| @@ -4,32 +4,57 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 8 | |||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 8 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 9 | #include "common/register_set.h" | ||
| 10 | 11 | ||
| 11 | namespace GPU { | 12 | namespace GPU { |
| 12 | 13 | ||
| 13 | static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second | 14 | static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second |
| 14 | static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame | 15 | static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame |
| 15 | 16 | ||
| 17 | // Returns index corresponding to the Regs member labeled by field_name | ||
| 18 | // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions | ||
| 19 | // when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). | ||
| 20 | // For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members | ||
| 21 | // Hopefully, this will be fixed sometime in the future. | ||
| 22 | // For lack of better alternatives, we currently hardcode the offsets when constant | ||
| 23 | // expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts | ||
| 24 | // will then make sure the offsets indeed match the automatically calculated ones). | ||
| 25 | #define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32)) | ||
| 26 | #if defined(_MSC_VER) | ||
| 27 | #define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index) | ||
| 28 | #else | ||
| 29 | // NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler | ||
| 30 | // really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX | ||
| 31 | // and then performs a (no-op) cast to size_t iff the second argument matches the expected | ||
| 32 | // field offset. Otherwise, the compiler will fail to compile this code. | ||
| 33 | #define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ | ||
| 34 | ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name)) | ||
| 35 | #endif | ||
| 36 | |||
| 16 | // MMIO region 0x1EFxxxxx | 37 | // MMIO region 0x1EFxxxxx |
| 17 | struct Regs { | 38 | struct Regs { |
| 18 | enum Id : u32 { | ||
| 19 | MemoryFill = 0x00004, // + 5,6,7; second block at 8-11 | ||
| 20 | |||
| 21 | FramebufferTop = 0x00117, // + 11a,11b,11c,11d(?),11e...126 | ||
| 22 | FramebufferBottom = 0x00157, // + 15a,15b,15c,15d(?),15e...166 | ||
| 23 | 39 | ||
| 24 | DisplayTransfer = 0x00300, // + 301,302,303,304,305,306 | 40 | // helper macro to properly align structure members. |
| 25 | 41 | // Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121", | |
| 26 | CommandProcessor = 0x00638, // + 63a,63c | 42 | // depending on the current source line to make sure variable names are unique. |
| 27 | 43 | #define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y | |
| 28 | NumIds = 0x01000 | 44 | #define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y) |
| 29 | }; | 45 | #define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)]; |
| 30 | 46 | ||
| 31 | template<Id id> | 47 | // helper macro to make sure the defined structures are of the expected size. |
| 32 | struct Struct; | 48 | #if defined(_MSC_VER) |
| 49 | // TODO: MSVC does not support using sizeof() on non-static data members even though this | ||
| 50 | // is technically allowed since C++11. This macro should be enabled once MSVC adds | ||
| 51 | // support for that. | ||
| 52 | #define ASSERT_MEMBER_SIZE(name, size_in_bytes) | ||
| 53 | #else | ||
| 54 | #define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ | ||
| 55 | static_assert(sizeof(name) == size_in_bytes, \ | ||
| 56 | "Structure size and register block length don't match"); | ||
| 57 | #endif | ||
| 33 | 58 | ||
| 34 | enum class FramebufferFormat : u32 { | 59 | enum class FramebufferFormat : u32 { |
| 35 | RGBA8 = 0, | 60 | RGBA8 = 0, |
| @@ -38,201 +63,191 @@ struct Regs { | |||
| 38 | RGB5A1 = 3, | 63 | RGB5A1 = 3, |
| 39 | RGBA4 = 4, | 64 | RGBA4 = 4, |
| 40 | }; | 65 | }; |
| 41 | }; | ||
| 42 | 66 | ||
| 43 | template<> | 67 | INSERT_PADDING_WORDS(0x4); |
| 44 | struct Regs::Struct<Regs::MemoryFill> { | ||
| 45 | u32 address_start; | ||
| 46 | u32 address_end; // ? | ||
| 47 | u32 size; | ||
| 48 | u32 value; // ? | ||
| 49 | 68 | ||
| 50 | inline u32 GetStartAddress() const { | 69 | struct { |
| 51 | return address_start * 8; | 70 | u32 address_start; |
| 52 | } | 71 | u32 address_end; // ? |
| 72 | u32 size; | ||
| 73 | u32 value; // ? | ||
| 53 | 74 | ||
| 54 | inline u32 GetEndAddress() const { | 75 | inline u32 GetStartAddress() const { |
| 55 | return address_end * 8; | 76 | return DecodeAddressRegister(address_start); |
| 56 | } | 77 | } |
| 57 | }; | ||
| 58 | static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match"); | ||
| 59 | 78 | ||
| 60 | template<> | 79 | inline u32 GetEndAddress() const { |
| 61 | struct Regs::Struct<Regs::FramebufferTop> { | 80 | return DecodeAddressRegister(address_end); |
| 62 | using Format = Regs::FramebufferFormat; | 81 | } |
| 82 | } memory_fill_config[2]; | ||
| 83 | ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10); | ||
| 63 | 84 | ||
| 64 | union { | 85 | INSERT_PADDING_WORDS(0x10b); |
| 65 | u32 size; | ||
| 66 | 86 | ||
| 67 | BitField< 0, 16, u32> width; | 87 | struct { |
| 68 | BitField<16, 16, u32> height; | 88 | using Format = Regs::FramebufferFormat; |
| 69 | }; | ||
| 70 | 89 | ||
| 71 | u32 pad0[2]; | 90 | union { |
| 91 | u32 size; | ||
| 72 | 92 | ||
| 73 | u32 address_left1; | 93 | BitField< 0, 16, u32> width; |
| 74 | u32 address_left2; | 94 | BitField<16, 16, u32> height; |
| 95 | }; | ||
| 75 | 96 | ||
| 76 | union { | 97 | INSERT_PADDING_WORDS(0x2); |
| 77 | u32 format; | ||
| 78 | 98 | ||
| 79 | BitField< 0, 3, Format> color_format; | 99 | u32 address_left1; |
| 80 | }; | 100 | u32 address_left2; |
| 81 | 101 | ||
| 82 | u32 pad1; | 102 | union { |
| 103 | u32 format; | ||
| 83 | 104 | ||
| 84 | union { | 105 | BitField< 0, 3, Format> color_format; |
| 85 | u32 active_fb; | 106 | }; |
| 86 | 107 | ||
| 87 | // 0: Use parameters ending with "1" | 108 | INSERT_PADDING_WORDS(0x1); |
| 88 | // 1: Use parameters ending with "2" | ||
| 89 | BitField<0, 1, u32> second_fb_active; | ||
| 90 | }; | ||
| 91 | 109 | ||
| 92 | u32 pad2[5]; | 110 | union { |
| 111 | u32 active_fb; | ||
| 93 | 112 | ||
| 94 | // Distance between two pixel rows, in bytes | 113 | // 0: Use parameters ending with "1" |
| 95 | u32 stride; | 114 | // 1: Use parameters ending with "2" |
| 115 | BitField<0, 1, u32> second_fb_active; | ||
| 116 | }; | ||
| 96 | 117 | ||
| 97 | u32 address_right1; | 118 | INSERT_PADDING_WORDS(0x5); |
| 98 | u32 address_right2; | ||
| 99 | }; | ||
| 100 | 119 | ||
| 101 | template<> | 120 | // Distance between two pixel rows, in bytes |
| 102 | struct Regs::Struct<Regs::FramebufferBottom> : public Regs::Struct<Regs::FramebufferTop> { | 121 | u32 stride; |
| 103 | }; | ||
| 104 | static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match"); | ||
| 105 | 122 | ||
| 106 | template<> | 123 | u32 address_right1; |
| 107 | struct Regs::Struct<Regs::DisplayTransfer> { | 124 | u32 address_right2; |
| 108 | using Format = Regs::FramebufferFormat; | ||
| 109 | 125 | ||
| 110 | u32 input_address; | 126 | INSERT_PADDING_WORDS(0x30); |
| 111 | u32 output_address; | 127 | } framebuffer_config[2]; |
| 128 | ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100); | ||
| 112 | 129 | ||
| 113 | inline u32 GetPhysicalInputAddress() const { | 130 | INSERT_PADDING_WORDS(0x169); |
| 114 | return input_address * 8; | ||
| 115 | } | ||
| 116 | 131 | ||
| 117 | inline u32 GetPhysicalOutputAddress() const { | 132 | struct { |
| 118 | return output_address * 8; | 133 | using Format = Regs::FramebufferFormat; |
| 119 | } | ||
| 120 | 134 | ||
| 121 | union { | 135 | u32 input_address; |
| 122 | u32 output_size; | 136 | u32 output_address; |
| 123 | 137 | ||
| 124 | BitField< 0, 16, u32> output_width; | 138 | inline u32 GetPhysicalInputAddress() const { |
| 125 | BitField<16, 16, u32> output_height; | 139 | return DecodeAddressRegister(input_address); |
| 126 | }; | 140 | } |
| 127 | 141 | ||
| 128 | union { | 142 | inline u32 GetPhysicalOutputAddress() const { |
| 129 | u32 input_size; | 143 | return DecodeAddressRegister(output_address); |
| 144 | } | ||
| 130 | 145 | ||
| 131 | BitField< 0, 16, u32> input_width; | 146 | union { |
| 132 | BitField<16, 16, u32> input_height; | 147 | u32 output_size; |
| 133 | }; | ||
| 134 | 148 | ||
| 135 | union { | 149 | BitField< 0, 16, u32> output_width; |
| 136 | u32 flags; | 150 | BitField<16, 16, u32> output_height; |
| 151 | }; | ||
| 137 | 152 | ||
| 138 | BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true | 153 | union { |
| 139 | BitField< 8, 3, Format> input_format; | 154 | u32 input_size; |
| 140 | BitField<12, 3, Format> output_format; | ||
| 141 | BitField<16, 1, u32> output_tiled; // stores output in a tiled format | ||
| 142 | }; | ||
| 143 | 155 | ||
| 144 | u32 unknown; | 156 | BitField< 0, 16, u32> input_width; |
| 157 | BitField<16, 16, u32> input_height; | ||
| 158 | }; | ||
| 145 | 159 | ||
| 146 | // it seems that writing to this field triggers the display transfer | 160 | union { |
| 147 | u32 trigger; | 161 | u32 flags; |
| 148 | }; | ||
| 149 | static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match"); | ||
| 150 | 162 | ||
| 151 | template<> | 163 | BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true |
| 152 | struct Regs::Struct<Regs::CommandProcessor> { | 164 | BitField< 8, 3, Format> input_format; |
| 153 | // command list size | 165 | BitField<12, 3, Format> output_format; |
| 154 | u32 size; | 166 | BitField<16, 1, u32> output_tiled; // stores output in a tiled format |
| 167 | }; | ||
| 155 | 168 | ||
| 156 | u32 pad0; | 169 | INSERT_PADDING_WORDS(0x1); |
| 157 | 170 | ||
| 158 | // command list address | 171 | // it seems that writing to this field triggers the display transfer |
| 159 | u32 address; | 172 | u32 trigger; |
| 173 | } display_transfer_config; | ||
| 174 | ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); | ||
| 160 | 175 | ||
| 161 | u32 pad1; | 176 | INSERT_PADDING_WORDS(0x331); |
| 162 | 177 | ||
| 163 | // it seems that writing to this field triggers command list processing | 178 | struct { |
| 164 | u32 trigger; | 179 | // command list size |
| 165 | }; | 180 | u32 size; |
| 166 | static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match"); | ||
| 167 | |||
| 168 | |||
| 169 | extern RegisterSet<u32, Regs> g_regs; | ||
| 170 | |||
| 171 | enum { | ||
| 172 | TOP_ASPECT_X = 0x5, | ||
| 173 | TOP_ASPECT_Y = 0x3, | ||
| 174 | |||
| 175 | TOP_HEIGHT = 240, | ||
| 176 | TOP_WIDTH = 400, | ||
| 177 | BOTTOM_WIDTH = 320, | ||
| 178 | |||
| 179 | // Physical addresses in FCRAM (chosen arbitrarily) | ||
| 180 | PADDR_TOP_LEFT_FRAME1 = 0x201D4C00, | ||
| 181 | PADDR_TOP_LEFT_FRAME2 = 0x202D4C00, | ||
| 182 | PADDR_TOP_RIGHT_FRAME1 = 0x203D4C00, | ||
| 183 | PADDR_TOP_RIGHT_FRAME2 = 0x204D4C00, | ||
| 184 | PADDR_SUB_FRAME1 = 0x205D4C00, | ||
| 185 | PADDR_SUB_FRAME2 = 0x206D4C00, | ||
| 186 | // Physical addresses in FCRAM used by ARM9 applications | ||
| 187 | /* PADDR_TOP_LEFT_FRAME1 = 0x20184E60, | ||
| 188 | PADDR_TOP_LEFT_FRAME2 = 0x201CB370, | ||
| 189 | PADDR_TOP_RIGHT_FRAME1 = 0x20282160, | ||
| 190 | PADDR_TOP_RIGHT_FRAME2 = 0x202C8670, | ||
| 191 | PADDR_SUB_FRAME1 = 0x202118E0, | ||
| 192 | PADDR_SUB_FRAME2 = 0x20249CF0,*/ | ||
| 193 | |||
| 194 | // Physical addresses in VRAM | ||
| 195 | // TODO: These should just be deduced from the ones above | ||
| 196 | PADDR_VRAM_TOP_LEFT_FRAME1 = 0x181D4C00, | ||
| 197 | PADDR_VRAM_TOP_LEFT_FRAME2 = 0x182D4C00, | ||
| 198 | PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x183D4C00, | ||
| 199 | PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x184D4C00, | ||
| 200 | PADDR_VRAM_SUB_FRAME1 = 0x185D4C00, | ||
| 201 | PADDR_VRAM_SUB_FRAME2 = 0x186D4C00, | ||
| 202 | // Physical addresses in VRAM used by ARM9 applications | ||
| 203 | /* PADDR_VRAM_TOP_LEFT_FRAME2 = 0x181CB370, | ||
| 204 | PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160, | ||
| 205 | PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670, | ||
| 206 | PADDR_VRAM_SUB_FRAME1 = 0x182118E0, | ||
| 207 | PADDR_VRAM_SUB_FRAME2 = 0x18249CF0,*/ | ||
| 208 | }; | ||
| 209 | 181 | ||
| 210 | /// Framebuffer location | 182 | INSERT_PADDING_WORDS(0x1); |
| 211 | enum FramebufferLocation { | 183 | |
| 212 | FRAMEBUFFER_LOCATION_UNKNOWN, ///< Framebuffer location is unknown | 184 | // command list address |
| 213 | FRAMEBUFFER_LOCATION_FCRAM, ///< Framebuffer is in the GSP heap | 185 | u32 address; |
| 214 | FRAMEBUFFER_LOCATION_VRAM, ///< Framebuffer is in VRAM | 186 | |
| 215 | }; | 187 | INSERT_PADDING_WORDS(0x1); |
| 188 | |||
| 189 | // it seems that writing to this field triggers command list processing | ||
| 190 | u32 trigger; | ||
| 191 | |||
| 192 | inline u32 GetPhysicalAddress() const { | ||
| 193 | return DecodeAddressRegister(address); | ||
| 194 | } | ||
| 195 | } command_processor_config; | ||
| 196 | ASSERT_MEMBER_SIZE(command_processor_config, 0x14); | ||
| 216 | 197 | ||
| 217 | /** | 198 | INSERT_PADDING_WORDS(0x9c3); |
| 218 | * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM | 199 | |
| 219 | * @param | 200 | #undef INSERT_PADDING_WORDS_HELPER1 |
| 220 | */ | 201 | #undef INSERT_PADDING_WORDS_HELPER2 |
| 221 | void SetFramebufferLocation(const FramebufferLocation mode); | 202 | #undef INSERT_PADDING_WORDS |
| 222 | 203 | ||
| 223 | /** | 204 | static inline int NumIds() { |
| 224 | * Gets a read-only pointer to a framebuffer in memory | 205 | return sizeof(Regs) / sizeof(u32); |
| 225 | * @param address Physical address of framebuffer | 206 | } |
| 226 | * @return Returns const pointer to raw framebuffer | 207 | |
| 227 | */ | 208 | u32& operator [] (int index) const { |
| 228 | const u8* GetFramebufferPointer(const u32 address); | 209 | u32* content = (u32*)this; |
| 229 | 210 | return content[index]; | |
| 230 | u32 GetFramebufferAddr(const u32 address); | 211 | } |
| 231 | 212 | ||
| 232 | /** | 213 | u32& operator [] (int index) { |
| 233 | * Gets the location of the framebuffers | 214 | u32* content = (u32*)this; |
| 234 | */ | 215 | return content[index]; |
| 235 | FramebufferLocation GetFramebufferLocation(u32 address); | 216 | } |
| 217 | |||
| 218 | private: | ||
| 219 | /* | ||
| 220 | * Most physical addresses which GPU registers refer to are 8-byte aligned. | ||
| 221 | * This function should be used to get the address from a raw register value. | ||
| 222 | */ | ||
| 223 | static inline u32 DecodeAddressRegister(u32 register_value) { | ||
| 224 | return register_value * 8; | ||
| 225 | } | ||
| 226 | }; | ||
| 227 | static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); | ||
| 228 | |||
| 229 | // TODO: MSVC does not support using offsetof() on non-static data members even though this | ||
| 230 | // is technically allowed since C++11. This macro should be enabled once MSVC adds | ||
| 231 | // support for that. | ||
| 232 | #ifndef _MSC_VER | ||
| 233 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 234 | static_assert(offsetof(Regs, field_name) == position * 4, \ | ||
| 235 | "Field "#field_name" has invalid position") | ||
| 236 | |||
| 237 | ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); | ||
| 238 | ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); | ||
| 239 | ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); | ||
| 240 | ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); | ||
| 241 | ASSERT_REG_POSITION(display_transfer_config, 0x00300); | ||
| 242 | ASSERT_REG_POSITION(command_processor_config, 0x00638); | ||
| 243 | |||
| 244 | #undef ASSERT_REG_POSITION | ||
| 245 | #endif // !defined(_MSC_VER) | ||
| 246 | |||
| 247 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. | ||
| 248 | static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); | ||
| 249 | |||
| 250 | extern Regs g_regs; | ||
| 236 | 251 | ||
| 237 | template <typename T> | 252 | template <typename T> |
| 238 | void Read(T &var, const u32 addr); | 253 | void Read(T &var, const u32 addr); |
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp index c45746be9..14fc01471 100644 --- a/src/core/mem_map.cpp +++ b/src/core/mem_map.cpp | |||
| @@ -72,14 +72,14 @@ void Init() { | |||
| 72 | 72 | ||
| 73 | g_base = MemoryMap_Setup(g_views, kNumMemViews, flags, &g_arena); | 73 | g_base = MemoryMap_Setup(g_views, kNumMemViews, flags, &g_arena); |
| 74 | 74 | ||
| 75 | NOTICE_LOG(MEMMAP, "initialized OK, RAM at %p (mirror at 0 @ %p)", g_heap, | 75 | NOTICE_LOG(MEMMAP, "initialized OK, RAM at %p (mirror at 0 @ %p)", g_heap, |
| 76 | g_physical_fcram); | 76 | g_physical_fcram); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void Shutdown() { | 79 | void Shutdown() { |
| 80 | u32 flags = 0; | 80 | u32 flags = 0; |
| 81 | MemoryMap_Shutdown(g_views, kNumMemViews, flags, &g_arena); | 81 | MemoryMap_Shutdown(g_views, kNumMemViews, flags, &g_arena); |
| 82 | 82 | ||
| 83 | g_arena.ReleaseSpace(); | 83 | g_arena.ReleaseSpace(); |
| 84 | g_base = NULL; | 84 | g_base = NULL; |
| 85 | 85 | ||
diff --git a/src/core/mem_map.h b/src/core/mem_map.h index 12941f558..3c7810573 100644 --- a/src/core/mem_map.h +++ b/src/core/mem_map.h | |||
| @@ -14,7 +14,6 @@ namespace Memory { | |||
| 14 | enum { | 14 | enum { |
| 15 | BOOTROM_SIZE = 0x00010000, ///< Bootrom (super secret code/data @ 0x8000) size | 15 | BOOTROM_SIZE = 0x00010000, ///< Bootrom (super secret code/data @ 0x8000) size |
| 16 | MPCORE_PRIV_SIZE = 0x00002000, ///< MPCore private memory region size | 16 | MPCORE_PRIV_SIZE = 0x00002000, ///< MPCore private memory region size |
| 17 | VRAM_SIZE = 0x00600000, ///< VRAM size | ||
| 18 | DSP_SIZE = 0x00080000, ///< DSP memory size | 17 | DSP_SIZE = 0x00080000, ///< DSP memory size |
| 19 | AXI_WRAM_SIZE = 0x00080000, ///< AXI WRAM size | 18 | AXI_WRAM_SIZE = 0x00080000, ///< AXI WRAM size |
| 20 | 19 | ||
| @@ -23,8 +22,6 @@ enum { | |||
| 23 | FCRAM_PADDR_END = (FCRAM_PADDR + FCRAM_SIZE), ///< FCRAM end of physical space | 22 | FCRAM_PADDR_END = (FCRAM_PADDR + FCRAM_SIZE), ///< FCRAM end of physical space |
| 24 | FCRAM_VADDR = 0x08000000, ///< FCRAM virtual address | 23 | FCRAM_VADDR = 0x08000000, ///< FCRAM virtual address |
| 25 | FCRAM_VADDR_END = (FCRAM_VADDR + FCRAM_SIZE), ///< FCRAM end of virtual space | 24 | FCRAM_VADDR_END = (FCRAM_VADDR + FCRAM_SIZE), ///< FCRAM end of virtual space |
| 26 | FCRAM_VADDR_FW0B = 0xF0000000, ///< FCRAM adress for firmare FW0B | ||
| 27 | FCRAM_VADDR_FW0B_END = (FCRAM_VADDR_FW0B + FCRAM_SIZE), ///< FCRAM adress end for FW0B | ||
| 28 | FCRAM_MASK = (FCRAM_SIZE - 1), ///< FCRAM mask | 25 | FCRAM_MASK = (FCRAM_SIZE - 1), ///< FCRAM mask |
| 29 | 26 | ||
| 30 | SHARED_MEMORY_SIZE = 0x04000000, ///< Shared memory size | 27 | SHARED_MEMORY_SIZE = 0x04000000, ///< Shared memory size |
| @@ -73,6 +70,7 @@ enum { | |||
| 73 | HARDWARE_IO_PADDR_END = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE), | 70 | HARDWARE_IO_PADDR_END = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE), |
| 74 | HARDWARE_IO_VADDR_END = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE), | 71 | HARDWARE_IO_VADDR_END = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE), |
| 75 | 72 | ||
| 73 | VRAM_SIZE = 0x00600000, | ||
| 76 | VRAM_PADDR = 0x18000000, | 74 | VRAM_PADDR = 0x18000000, |
| 77 | VRAM_VADDR = 0x1F000000, | 75 | VRAM_VADDR = 0x1F000000, |
| 78 | VRAM_PADDR_END = (VRAM_PADDR + VRAM_SIZE), | 76 | VRAM_PADDR_END = (VRAM_PADDR + VRAM_SIZE), |
| @@ -112,7 +110,7 @@ struct MemoryBlock { | |||
| 112 | 110 | ||
| 113 | // In 64-bit, this might point to "high memory" (above the 32-bit limit), | 111 | // In 64-bit, this might point to "high memory" (above the 32-bit limit), |
| 114 | // so be sure to load it into a 64-bit register. | 112 | // so be sure to load it into a 64-bit register. |
| 115 | extern u8 *g_base; | 113 | extern u8 *g_base; |
| 116 | 114 | ||
| 117 | // These are guaranteed to point to "low memory" addresses (sub-32-bit). | 115 | // These are guaranteed to point to "low memory" addresses (sub-32-bit). |
| 118 | // 64-bit: Pointers to low-mem (sub-0x10000000) mirror | 116 | // 64-bit: Pointers to low-mem (sub-0x10000000) mirror |
| @@ -147,7 +145,7 @@ void Write32(const u32 addr, const u32 data); | |||
| 147 | 145 | ||
| 148 | void WriteBlock(const u32 addr, const u8* data, const int size); | 146 | void WriteBlock(const u32 addr, const u8* data, const int size); |
| 149 | 147 | ||
| 150 | u8* GetPointer(const u32 Address); | 148 | u8* GetPointer(const u32 virtual_address); |
| 151 | 149 | ||
| 152 | /** | 150 | /** |
| 153 | * Maps a block of memory on the heap | 151 | * Maps a block of memory on the heap |
| @@ -169,16 +167,10 @@ inline const char* GetCharPointer(const u32 address) { | |||
| 169 | return (const char *)GetPointer(address); | 167 | return (const char *)GetPointer(address); |
| 170 | } | 168 | } |
| 171 | 169 | ||
| 172 | inline const u32 VirtualAddressFromPhysical_FCRAM(const u32 address) { | 170 | /// Converts a physical address to virtual address |
| 173 | return ((address & FCRAM_MASK) | FCRAM_VADDR); | 171 | u32 PhysicalToVirtualAddress(const u32 addr); |
| 174 | } | ||
| 175 | |||
| 176 | inline const u32 VirtualAddressFromPhysical_IO(const u32 address) { | ||
| 177 | return (address + 0x0EB00000); | ||
| 178 | } | ||
| 179 | 172 | ||
| 180 | inline const u32 VirtualAddressFromPhysical_VRAM(const u32 address) { | 173 | /// Converts a virtual address to physical address |
| 181 | return (address + 0x07000000); | 174 | u32 VirtualToPhysicalAddress(const u32 addr); |
| 182 | } | ||
| 183 | 175 | ||
| 184 | } // namespace | 176 | } // namespace |
diff --git a/src/core/mem_map_funcs.cpp b/src/core/mem_map_funcs.cpp index 305be8468..5772cca52 100644 --- a/src/core/mem_map_funcs.cpp +++ b/src/core/mem_map_funcs.cpp | |||
| @@ -17,37 +17,44 @@ std::map<u32, MemoryBlock> g_heap_map; | |||
| 17 | std::map<u32, MemoryBlock> g_heap_gsp_map; | 17 | std::map<u32, MemoryBlock> g_heap_gsp_map; |
| 18 | std::map<u32, MemoryBlock> g_shared_map; | 18 | std::map<u32, MemoryBlock> g_shared_map; |
| 19 | 19 | ||
| 20 | /// Convert a physical address (or firmware-specific virtual address) to primary virtual address | 20 | /// Convert a physical address to virtual address |
| 21 | u32 _VirtualAddress(const u32 addr) { | 21 | u32 PhysicalToVirtualAddress(const u32 addr) { |
| 22 | // Our memory interface read/write functions assume virtual addresses. Put any physical address | 22 | // Our memory interface read/write functions assume virtual addresses. Put any physical address |
| 23 | // to virtual address translations here. This is obviously quite hacky... But we're not doing | 23 | // to virtual address translations here. This is quite hacky, but necessary until we implement |
| 24 | // any MMU emulation yet or anything | 24 | // proper MMU emulation. |
| 25 | if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) { | 25 | // TODO: Screw it, I'll let bunnei figure out how to do this properly. |
| 26 | return VirtualAddressFromPhysical_FCRAM(addr); | 26 | if ((addr >= VRAM_PADDR) && (addr < VRAM_PADDR_END)) { |
| 27 | 27 | return addr - VRAM_PADDR + VRAM_VADDR; | |
| 28 | // Virtual address mapping FW0B | 28 | }else if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) { |
| 29 | } else if ((addr >= FCRAM_VADDR_FW0B) && (addr < FCRAM_VADDR_FW0B_END)) { | 29 | return addr - FCRAM_PADDR + FCRAM_VADDR; |
| 30 | return VirtualAddressFromPhysical_FCRAM(addr); | 30 | } |
| 31 | 31 | ||
| 32 | // Hardware IO | 32 | ERROR_LOG(MEMMAP, "Unknown physical address @ 0x%08x", addr); |
| 33 | // TODO(bunnei): FixMe | 33 | return addr; |
| 34 | // This isn't going to work... The physical address of HARDWARE_IO conflicts with the virtual | 34 | } |
| 35 | // address of shared memory. | ||
| 36 | //} else if ((addr >= HARDWARE_IO_PADDR) && (addr < HARDWARE_IO_PADDR_END)) { | ||
| 37 | // return (addr + 0x0EB00000); | ||
| 38 | 35 | ||
| 36 | /// Convert a physical address to virtual address | ||
| 37 | u32 VirtualToPhysicalAddress(const u32 addr) { | ||
| 38 | // Our memory interface read/write functions assume virtual addresses. Put any physical address | ||
| 39 | // to virtual address translations here. This is quite hacky, but necessary until we implement | ||
| 40 | // proper MMU emulation. | ||
| 41 | // TODO: Screw it, I'll let bunnei figure out how to do this properly. | ||
| 42 | if ((addr >= VRAM_VADDR) && (addr < VRAM_VADDR_END)) { | ||
| 43 | return addr - 0x07000000; | ||
| 44 | } else if ((addr >= FCRAM_VADDR) && (addr < FCRAM_VADDR_END)) { | ||
| 45 | return addr - FCRAM_VADDR + FCRAM_PADDR; | ||
| 39 | } | 46 | } |
| 47 | |||
| 48 | ERROR_LOG(MEMMAP, "Unknown virtual address @ 0x%08x", addr); | ||
| 40 | return addr; | 49 | return addr; |
| 41 | } | 50 | } |
| 42 | 51 | ||
| 43 | template <typename T> | 52 | template <typename T> |
| 44 | inline void Read(T &var, const u32 addr) { | 53 | inline void Read(T &var, const u32 vaddr) { |
| 45 | // TODO: Figure out the fastest order of tests for both read and write (they are probably different). | 54 | // TODO: Figure out the fastest order of tests for both read and write (they are probably different). |
| 46 | // TODO: Make sure this represents the mirrors in a correct way. | 55 | // TODO: Make sure this represents the mirrors in a correct way. |
| 47 | // Could just do a base-relative read, too.... TODO | 56 | // Could just do a base-relative read, too.... TODO |
| 48 | 57 | ||
| 49 | const u32 vaddr = _VirtualAddress(addr); | ||
| 50 | |||
| 51 | // Kernel memory command buffer | 58 | // Kernel memory command buffer |
| 52 | if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { | 59 | if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { |
| 53 | var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]); | 60 | var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]); |
| @@ -91,9 +98,8 @@ inline void Read(T &var, const u32 addr) { | |||
| 91 | } | 98 | } |
| 92 | 99 | ||
| 93 | template <typename T> | 100 | template <typename T> |
| 94 | inline void Write(u32 addr, const T data) { | 101 | inline void Write(u32 vaddr, const T data) { |
| 95 | u32 vaddr = _VirtualAddress(addr); | 102 | |
| 96 | |||
| 97 | // Kernel memory command buffer | 103 | // Kernel memory command buffer |
| 98 | if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { | 104 | if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { |
| 99 | *(T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK] = data; | 105 | *(T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK] = data; |
| @@ -133,16 +139,14 @@ inline void Write(u32 addr, const T data) { | |||
| 133 | // _assert_msg_(MEMMAP, false, "umimplemented write to Configuration Memory"); | 139 | // _assert_msg_(MEMMAP, false, "umimplemented write to Configuration Memory"); |
| 134 | //} else if ((vaddr & 0xFFFFF000) == 0x1FF81000) { | 140 | //} else if ((vaddr & 0xFFFFF000) == 0x1FF81000) { |
| 135 | // _assert_msg_(MEMMAP, false, "umimplemented write to shared page"); | 141 | // _assert_msg_(MEMMAP, false, "umimplemented write to shared page"); |
| 136 | 142 | ||
| 137 | // Error out... | 143 | // Error out... |
| 138 | } else { | 144 | } else { |
| 139 | ERROR_LOG(MEMMAP, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, vaddr); | 145 | ERROR_LOG(MEMMAP, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, vaddr); |
| 140 | } | 146 | } |
| 141 | } | 147 | } |
| 142 | 148 | ||
| 143 | u8 *GetPointer(const u32 addr) { | 149 | u8 *GetPointer(const u32 vaddr) { |
| 144 | const u32 vaddr = _VirtualAddress(addr); | ||
| 145 | |||
| 146 | // Kernel memory command buffer | 150 | // Kernel memory command buffer |
| 147 | if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { | 151 | if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { |
| 148 | return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK); | 152 | return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK); |
| @@ -185,12 +189,12 @@ u8 *GetPointer(const u32 addr) { | |||
| 185 | */ | 189 | */ |
| 186 | u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) { | 190 | u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) { |
| 187 | MemoryBlock block; | 191 | MemoryBlock block; |
| 188 | 192 | ||
| 189 | block.base_address = HEAP_VADDR; | 193 | block.base_address = HEAP_VADDR; |
| 190 | block.size = size; | 194 | block.size = size; |
| 191 | block.operation = operation; | 195 | block.operation = operation; |
| 192 | block.permissions = permissions; | 196 | block.permissions = permissions; |
| 193 | 197 | ||
| 194 | if (g_heap_map.size() > 0) { | 198 | if (g_heap_map.size() > 0) { |
| 195 | const MemoryBlock last_block = g_heap_map.rbegin()->second; | 199 | const MemoryBlock last_block = g_heap_map.rbegin()->second; |
| 196 | block.address = last_block.address + last_block.size; | 200 | block.address = last_block.address + last_block.size; |
| @@ -208,12 +212,12 @@ u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) { | |||
| 208 | */ | 212 | */ |
| 209 | u32 MapBlock_HeapGSP(u32 size, u32 operation, u32 permissions) { | 213 | u32 MapBlock_HeapGSP(u32 size, u32 operation, u32 permissions) { |
| 210 | MemoryBlock block; | 214 | MemoryBlock block; |
| 211 | 215 | ||
| 212 | block.base_address = HEAP_GSP_VADDR; | 216 | block.base_address = HEAP_GSP_VADDR; |
| 213 | block.size = size; | 217 | block.size = size; |
| 214 | block.operation = operation; | 218 | block.operation = operation; |
| 215 | block.permissions = permissions; | 219 | block.permissions = permissions; |
| 216 | 220 | ||
| 217 | if (g_heap_gsp_map.size() > 0) { | 221 | if (g_heap_gsp_map.size() > 0) { |
| 218 | const MemoryBlock last_block = g_heap_gsp_map.rbegin()->second; | 222 | const MemoryBlock last_block = g_heap_gsp_map.rbegin()->second; |
| 219 | block.address = last_block.address + last_block.size; | 223 | block.address = last_block.address + last_block.size; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e43e6e1bb..8e7b93acb 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,10 +1,22 @@ | |||
| 1 | set(SRCS video_core.cpp | 1 | set(SRCS clipper.cpp |
| 2 | command_processor.cpp | ||
| 3 | primitive_assembly.cpp | ||
| 4 | rasterizer.cpp | ||
| 2 | utils.cpp | 5 | utils.cpp |
| 6 | vertex_shader.cpp | ||
| 7 | video_core.cpp | ||
| 3 | renderer_opengl/renderer_opengl.cpp) | 8 | renderer_opengl/renderer_opengl.cpp) |
| 4 | 9 | ||
| 5 | set(HEADERS video_core.h | 10 | set(HEADERS clipper.h |
| 11 | command_processor.h | ||
| 12 | math.h | ||
| 13 | primitive_assembly.h | ||
| 14 | rasterizer.h | ||
| 6 | utils.h | 15 | utils.h |
| 16 | video_core.h | ||
| 7 | renderer_base.h | 17 | renderer_base.h |
| 18 | vertex_shader.h | ||
| 19 | video_core.h | ||
| 8 | renderer_opengl/renderer_opengl.h) | 20 | renderer_opengl/renderer_opengl.h) |
| 9 | 21 | ||
| 10 | add_library(video_core STATIC ${SRCS} ${HEADERS}) | 22 | add_library(video_core STATIC ${SRCS} ${HEADERS}) |
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp new file mode 100644 index 000000000..b7180328c --- /dev/null +++ b/src/video_core/clipper.cpp | |||
| @@ -0,0 +1,179 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "clipper.h" | ||
| 8 | #include "pica.h" | ||
| 9 | #include "rasterizer.h" | ||
| 10 | #include "vertex_shader.h" | ||
| 11 | |||
| 12 | namespace Pica { | ||
| 13 | |||
| 14 | namespace Clipper { | ||
| 15 | |||
| 16 | struct ClippingEdge { | ||
| 17 | public: | ||
| 18 | enum Type { | ||
| 19 | POS_X = 0, | ||
| 20 | NEG_X = 1, | ||
| 21 | POS_Y = 2, | ||
| 22 | NEG_Y = 3, | ||
| 23 | POS_Z = 4, | ||
| 24 | NEG_Z = 5, | ||
| 25 | }; | ||
| 26 | |||
| 27 | ClippingEdge(Type type, float24 position) : type(type), pos(position) {} | ||
| 28 | |||
| 29 | bool IsInside(const OutputVertex& vertex) const { | ||
| 30 | switch (type) { | ||
| 31 | case POS_X: return vertex.pos.x <= pos * vertex.pos.w; | ||
| 32 | case NEG_X: return vertex.pos.x >= pos * vertex.pos.w; | ||
| 33 | case POS_Y: return vertex.pos.y <= pos * vertex.pos.w; | ||
| 34 | case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w; | ||
| 35 | |||
| 36 | // TODO: Check z compares ... should be 0..1 instead? | ||
| 37 | case POS_Z: return vertex.pos.z <= pos * vertex.pos.w; | ||
| 38 | |||
| 39 | default: | ||
| 40 | case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | bool IsOutSide(const OutputVertex& vertex) const { | ||
| 45 | return !IsInside(vertex); | ||
| 46 | } | ||
| 47 | |||
| 48 | OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { | ||
| 49 | auto dotpr = [this](const OutputVertex& vtx) { | ||
| 50 | switch (type) { | ||
| 51 | case POS_X: return vtx.pos.x - vtx.pos.w; | ||
| 52 | case NEG_X: return -vtx.pos.x - vtx.pos.w; | ||
| 53 | case POS_Y: return vtx.pos.y - vtx.pos.w; | ||
| 54 | case NEG_Y: return -vtx.pos.y - vtx.pos.w; | ||
| 55 | |||
| 56 | // TODO: Verify z clipping | ||
| 57 | case POS_Z: return vtx.pos.z - vtx.pos.w; | ||
| 58 | |||
| 59 | default: | ||
| 60 | case NEG_Z: return -vtx.pos.w; | ||
| 61 | } | ||
| 62 | }; | ||
| 63 | |||
| 64 | float24 dp = dotpr(v0); | ||
| 65 | float24 dp_prev = dotpr(v1); | ||
| 66 | float24 factor = dp_prev / (dp_prev - dp); | ||
| 67 | |||
| 68 | return OutputVertex::Lerp(factor, v0, v1); | ||
| 69 | } | ||
| 70 | |||
| 71 | private: | ||
| 72 | Type type; | ||
| 73 | float24 pos; | ||
| 74 | }; | ||
| 75 | |||
| 76 | static void InitScreenCoordinates(OutputVertex& vtx) | ||
| 77 | { | ||
| 78 | struct { | ||
| 79 | float24 halfsize_x; | ||
| 80 | float24 offset_x; | ||
| 81 | float24 halfsize_y; | ||
| 82 | float24 offset_y; | ||
| 83 | float24 zscale; | ||
| 84 | float24 offset_z; | ||
| 85 | } viewport; | ||
| 86 | |||
| 87 | viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x); | ||
| 88 | viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y); | ||
| 89 | viewport.offset_x = float24::FromFloat32(registers.viewport_corner.x); | ||
| 90 | viewport.offset_y = float24::FromFloat32(registers.viewport_corner.y); | ||
| 91 | viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); | ||
| 92 | viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); | ||
| 93 | |||
| 94 | // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not | ||
| 95 | vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x / float24::FromFloat32(2.0) + viewport.offset_x; | ||
| 96 | vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | ||
| 97 | vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale; | ||
| 98 | } | ||
| 99 | |||
| 100 | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | ||
| 101 | |||
| 102 | // TODO (neobrain): | ||
| 103 | // The list of output vertices has some fixed maximum size, | ||
| 104 | // however I haven't taken the time to figure out what it is exactly. | ||
| 105 | // For now, we hence just assume a maximal size of 1000 vertices. | ||
| 106 | const size_t max_vertices = 1000; | ||
| 107 | std::vector<OutputVertex> buffer_vertices; | ||
| 108 | std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 }; | ||
| 109 | |||
| 110 | // Make sure to reserve space for all vertices. | ||
| 111 | // Without this, buffer reallocation would invalidate references. | ||
| 112 | buffer_vertices.reserve(max_vertices); | ||
| 113 | |||
| 114 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. | ||
| 115 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) | ||
| 116 | for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)), | ||
| 117 | ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)), | ||
| 118 | ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)), | ||
| 119 | ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)), | ||
| 120 | ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), | ||
| 121 | ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { | ||
| 122 | |||
| 123 | const std::vector<OutputVertex*> input_list = output_list; | ||
| 124 | output_list.clear(); | ||
| 125 | |||
| 126 | const OutputVertex* reference_vertex = input_list.back(); | ||
| 127 | |||
| 128 | for (const auto& vertex : input_list) { | ||
| 129 | // NOTE: This algorithm changes vertex order in some cases! | ||
| 130 | if (edge.IsInside(*vertex)) { | ||
| 131 | if (edge.IsOutSide(*reference_vertex)) { | ||
| 132 | buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | ||
| 133 | output_list.push_back(&(buffer_vertices.back())); | ||
| 134 | } | ||
| 135 | |||
| 136 | output_list.push_back(vertex); | ||
| 137 | } else if (edge.IsInside(*reference_vertex)) { | ||
| 138 | buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | ||
| 139 | output_list.push_back(&(buffer_vertices.back())); | ||
| 140 | } | ||
| 141 | |||
| 142 | reference_vertex = vertex; | ||
| 143 | } | ||
| 144 | |||
| 145 | // Need to have at least a full triangle to continue... | ||
| 146 | if (output_list.size() < 3) | ||
| 147 | return; | ||
| 148 | } | ||
| 149 | |||
| 150 | InitScreenCoordinates(*(output_list[0])); | ||
| 151 | InitScreenCoordinates(*(output_list[1])); | ||
| 152 | |||
| 153 | for (int i = 0; i < output_list.size() - 2; i ++) { | ||
| 154 | OutputVertex& vtx0 = *(output_list[0]); | ||
| 155 | OutputVertex& vtx1 = *(output_list[i+1]); | ||
| 156 | OutputVertex& vtx2 = *(output_list[i+2]); | ||
| 157 | |||
| 158 | InitScreenCoordinates(vtx2); | ||
| 159 | |||
| 160 | DEBUG_LOG(GPU, | ||
| 161 | "Triangle %d/%d (%d buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), " | ||
| 162 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " | ||
| 163 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", | ||
| 164 | i,output_list.size(), buffer_vertices.size(), | ||
| 165 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),output_list.size(), | ||
| 166 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), | ||
| 167 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), | ||
| 168 | vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), | ||
| 169 | vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), | ||
| 170 | vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); | ||
| 171 | |||
| 172 | Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | |||
| 177 | } // namespace | ||
| 178 | |||
| 179 | } // namespace | ||
diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h new file mode 100644 index 000000000..14d31ca1e --- /dev/null +++ b/src/video_core/clipper.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Pica { | ||
| 8 | |||
| 9 | namespace VertexShader { | ||
| 10 | struct OutputVertex; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Clipper { | ||
| 14 | |||
| 15 | using VertexShader::OutputVertex; | ||
| 16 | |||
| 17 | void ProcessTriangle(OutputVertex& v0, OutputVertex& v1, OutputVertex& v2); | ||
| 18 | |||
| 19 | } // namespace | ||
| 20 | |||
| 21 | } // namespace | ||
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp new file mode 100644 index 000000000..020a4da3f --- /dev/null +++ b/src/video_core/command_processor.cpp | |||
| @@ -0,0 +1,238 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "command_processor.h" | ||
| 6 | #include "math.h" | ||
| 7 | #include "pica.h" | ||
| 8 | #include "primitive_assembly.h" | ||
| 9 | #include "vertex_shader.h" | ||
| 10 | |||
| 11 | |||
| 12 | namespace Pica { | ||
| 13 | |||
| 14 | Regs registers; | ||
| 15 | |||
| 16 | namespace CommandProcessor { | ||
| 17 | |||
| 18 | static int float_regs_counter = 0; | ||
| 19 | |||
| 20 | static u32 uniform_write_buffer[4]; | ||
| 21 | |||
| 22 | // Used for VSLoadProgramData and VSLoadSwizzleData | ||
| 23 | static u32 vs_binary_write_offset = 0; | ||
| 24 | static u32 vs_swizzle_write_offset = 0; | ||
| 25 | |||
| 26 | static inline void WritePicaReg(u32 id, u32 value) { | ||
| 27 | u32 old_value = registers[id]; | ||
| 28 | registers[id] = value; | ||
| 29 | |||
| 30 | switch(id) { | ||
| 31 | // It seems like these trigger vertex rendering | ||
| 32 | case PICA_REG_INDEX(trigger_draw): | ||
| 33 | case PICA_REG_INDEX(trigger_draw_indexed): | ||
| 34 | { | ||
| 35 | const auto& attribute_config = registers.vertex_attributes; | ||
| 36 | const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); | ||
| 37 | |||
| 38 | // Information about internal vertex attributes | ||
| 39 | const u8* vertex_attribute_sources[16]; | ||
| 40 | u32 vertex_attribute_strides[16]; | ||
| 41 | u32 vertex_attribute_formats[16]; | ||
| 42 | u32 vertex_attribute_elements[16]; | ||
| 43 | u32 vertex_attribute_element_size[16]; | ||
| 44 | |||
| 45 | // Setup attribute data from loaders | ||
| 46 | for (int loader = 0; loader < 12; ++loader) { | ||
| 47 | const auto& loader_config = attribute_config.attribute_loaders[loader]; | ||
| 48 | |||
| 49 | const u8* load_address = base_address + loader_config.data_offset; | ||
| 50 | |||
| 51 | // TODO: What happens if a loader overwrites a previous one's data? | ||
| 52 | for (int component = 0; component < loader_config.component_count; ++component) { | ||
| 53 | u32 attribute_index = loader_config.GetComponent(component); | ||
| 54 | vertex_attribute_sources[attribute_index] = load_address; | ||
| 55 | vertex_attribute_strides[attribute_index] = loader_config.byte_count; | ||
| 56 | vertex_attribute_formats[attribute_index] = (u32)attribute_config.GetFormat(attribute_index); | ||
| 57 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | ||
| 58 | vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); | ||
| 59 | load_address += attribute_config.GetStride(attribute_index); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | // Load vertices | ||
| 64 | bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); | ||
| 65 | |||
| 66 | const auto& index_info = registers.index_array; | ||
| 67 | const u8* index_address_8 = (u8*)base_address + index_info.offset; | ||
| 68 | const u16* index_address_16 = (u16*)index_address_8; | ||
| 69 | bool index_u16 = (bool)index_info.format; | ||
| 70 | |||
| 71 | for (int index = 0; index < registers.num_vertices; ++index) | ||
| 72 | { | ||
| 73 | int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; | ||
| 74 | |||
| 75 | if (is_indexed) { | ||
| 76 | // TODO: Implement some sort of vertex cache! | ||
| 77 | } | ||
| 78 | |||
| 79 | // Initialize data for the current vertex | ||
| 80 | VertexShader::InputVertex input; | ||
| 81 | |||
| 82 | for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | ||
| 83 | for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||
| 84 | const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | ||
| 85 | const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : | ||
| 86 | (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : | ||
| 87 | (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : | ||
| 88 | *(float*)srcdata; | ||
| 89 | input.attr[i][comp] = float24::FromFloat32(srcval); | ||
| 90 | DEBUG_LOG(GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", | ||
| 91 | comp, i, vertex, index, | ||
| 92 | attribute_config.GetBaseAddress(), | ||
| 93 | vertex_attribute_sources[i] - base_address, | ||
| 94 | srcdata - vertex_attribute_sources[i], | ||
| 95 | input.attr[i][comp].ToFloat32()); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); | ||
| 99 | |||
| 100 | if (is_indexed) { | ||
| 101 | // TODO: Add processed vertex to vertex cache! | ||
| 102 | } | ||
| 103 | |||
| 104 | PrimitiveAssembly::SubmitVertex(output); | ||
| 105 | } | ||
| 106 | break; | ||
| 107 | } | ||
| 108 | |||
| 109 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): | ||
| 110 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): | ||
| 111 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): | ||
| 112 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): | ||
| 113 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): | ||
| 114 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): | ||
| 115 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): | ||
| 116 | case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): | ||
| 117 | { | ||
| 118 | auto& uniform_setup = registers.vs_uniform_setup; | ||
| 119 | |||
| 120 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 121 | // it directly write the values? | ||
| 122 | uniform_write_buffer[float_regs_counter++] = value; | ||
| 123 | |||
| 124 | // Uniforms are written in a packed format such that 4 float24 values are encoded in | ||
| 125 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 126 | // written. | ||
| 127 | if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || | ||
| 128 | (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||
| 129 | float_regs_counter = 0; | ||
| 130 | |||
| 131 | auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index); | ||
| 132 | |||
| 133 | if (uniform_setup.index > 95) { | ||
| 134 | ERROR_LOG(GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | |||
| 138 | // NOTE: The destination component order indeed is "backwards" | ||
| 139 | if (uniform_setup.IsFloat32()) { | ||
| 140 | for (auto i : {0,1,2,3}) | ||
| 141 | uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | ||
| 142 | } else { | ||
| 143 | // TODO: Untested | ||
| 144 | uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); | ||
| 145 | uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 146 | uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); | ||
| 147 | uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); | ||
| 148 | } | ||
| 149 | |||
| 150 | DEBUG_LOG(GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, | ||
| 151 | uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), | ||
| 152 | uniform.w.ToFloat32()); | ||
| 153 | |||
| 154 | // TODO: Verify that this actually modifies the register! | ||
| 155 | uniform_setup.index = uniform_setup.index + 1; | ||
| 156 | } | ||
| 157 | break; | ||
| 158 | } | ||
| 159 | |||
| 160 | // Seems to be used to reset the write pointer for VSLoadProgramData | ||
| 161 | case PICA_REG_INDEX(vs_program.begin_load): | ||
| 162 | vs_binary_write_offset = 0; | ||
| 163 | break; | ||
| 164 | |||
| 165 | // Load shader program code | ||
| 166 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): | ||
| 167 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): | ||
| 168 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): | ||
| 169 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): | ||
| 170 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): | ||
| 171 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): | ||
| 172 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): | ||
| 173 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): | ||
| 174 | { | ||
| 175 | VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value); | ||
| 176 | vs_binary_write_offset++; | ||
| 177 | break; | ||
| 178 | } | ||
| 179 | |||
| 180 | // Seems to be used to reset the write pointer for VSLoadSwizzleData | ||
| 181 | case PICA_REG_INDEX(vs_swizzle_patterns.begin_load): | ||
| 182 | vs_swizzle_write_offset = 0; | ||
| 183 | break; | ||
| 184 | |||
| 185 | // Load swizzle pattern data | ||
| 186 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): | ||
| 187 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): | ||
| 188 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): | ||
| 189 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): | ||
| 190 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): | ||
| 191 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): | ||
| 192 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): | ||
| 193 | case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): | ||
| 194 | { | ||
| 195 | VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value); | ||
| 196 | vs_swizzle_write_offset++; | ||
| 197 | break; | ||
| 198 | } | ||
| 199 | |||
| 200 | default: | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { | ||
| 206 | const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]); | ||
| 207 | |||
| 208 | u32* read_pointer = (u32*)first_command_word; | ||
| 209 | |||
| 210 | // TODO: Take parameter mask into consideration! | ||
| 211 | |||
| 212 | WritePicaReg(header.cmd_id, *read_pointer); | ||
| 213 | read_pointer += 2; | ||
| 214 | |||
| 215 | for (int i = 1; i < 1+header.extra_data_length; ++i) { | ||
| 216 | u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); | ||
| 217 | WritePicaReg(cmd, *read_pointer); | ||
| 218 | ++read_pointer; | ||
| 219 | } | ||
| 220 | |||
| 221 | // align read pointer to 8 bytes | ||
| 222 | if ((first_command_word - read_pointer) % 2) | ||
| 223 | ++read_pointer; | ||
| 224 | |||
| 225 | return read_pointer - first_command_word; | ||
| 226 | } | ||
| 227 | |||
| 228 | void ProcessCommandList(const u32* list, u32 size) { | ||
| 229 | u32* read_pointer = (u32*)list; | ||
| 230 | |||
| 231 | while (read_pointer < list + size) { | ||
| 232 | read_pointer += ExecuteCommandBlock(read_pointer); | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace | ||
| 237 | |||
| 238 | } // namespace | ||
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h new file mode 100644 index 000000000..6b6241a25 --- /dev/null +++ b/src/video_core/command_processor.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | #include "pica.h" | ||
| 11 | |||
| 12 | namespace Pica { | ||
| 13 | |||
| 14 | namespace CommandProcessor { | ||
| 15 | |||
| 16 | union CommandHeader { | ||
| 17 | u32 hex; | ||
| 18 | |||
| 19 | BitField< 0, 16, u32> cmd_id; | ||
| 20 | BitField<16, 4, u32> parameter_mask; | ||
| 21 | BitField<20, 11, u32> extra_data_length; | ||
| 22 | BitField<31, 1, u32> group_commands; | ||
| 23 | }; | ||
| 24 | static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout"); | ||
| 25 | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); | ||
| 26 | |||
| 27 | void ProcessCommandList(const u32* list, u32 size); | ||
| 28 | |||
| 29 | } // namespace | ||
| 30 | |||
| 31 | } // namespace | ||
diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index 5d85f90b9..2ba873457 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #include "common/log.h" | 11 | #include "common/log.h" |
| 12 | 12 | ||
| 13 | #include "core/hle/service/gsp.h" | 13 | #include "core/hle/service/gsp.h" |
| 14 | |||
| 15 | #include "command_processor.h" | ||
| 14 | #include "pica.h" | 16 | #include "pica.h" |
| 15 | 17 | ||
| 16 | class GraphicsDebugger | 18 | class GraphicsDebugger |
| @@ -20,10 +22,10 @@ public: | |||
| 20 | // A vector of commands represented by their raw byte sequence | 22 | // A vector of commands represented by their raw byte sequence |
| 21 | struct PicaCommand : public std::vector<u32> | 23 | struct PicaCommand : public std::vector<u32> |
| 22 | { | 24 | { |
| 23 | const Pica::CommandHeader& GetHeader() const | 25 | const Pica::CommandProcessor::CommandHeader& GetHeader() const |
| 24 | { | 26 | { |
| 25 | const u32& val = at(1); | 27 | const u32& val = at(1); |
| 26 | return *(Pica::CommandHeader*)&val; | 28 | return *(Pica::CommandProcessor::CommandHeader*)&val; |
| 27 | } | 29 | } |
| 28 | }; | 30 | }; |
| 29 | 31 | ||
| @@ -99,7 +101,7 @@ public: | |||
| 99 | PicaCommandList cmdlist; | 101 | PicaCommandList cmdlist; |
| 100 | for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) | 102 | for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) |
| 101 | { | 103 | { |
| 102 | const Pica::CommandHeader header = static_cast<Pica::CommandHeader>(parse_pointer[1]); | 104 | const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]); |
| 103 | 105 | ||
| 104 | cmdlist.push_back(PicaCommand()); | 106 | cmdlist.push_back(PicaCommand()); |
| 105 | auto& cmd = cmdlist.back(); | 107 | auto& cmd = cmdlist.back(); |
diff --git a/src/video_core/math.h b/src/video_core/math.h new file mode 100644 index 000000000..7030f2cfb --- /dev/null +++ b/src/video_core/math.h | |||
| @@ -0,0 +1,578 @@ | |||
| 1 | // Licensed under GPLv2 | ||
| 2 | // Refer to the license.txt file included. | ||
| 3 | |||
| 4 | |||
| 5 | // Copyright 2014 Tony Wasserka | ||
| 6 | // All rights reserved. | ||
| 7 | // | ||
| 8 | // Redistribution and use in source and binary forms, with or without | ||
| 9 | // modification, are permitted provided that the following conditions are met: | ||
| 10 | // | ||
| 11 | // * Redistributions of source code must retain the above copyright | ||
| 12 | // notice, this list of conditions and the following disclaimer. | ||
| 13 | // * Redistributions in binary form must reproduce the above copyright | ||
| 14 | // notice, this list of conditions and the following disclaimer in the | ||
| 15 | // documentation and/or other materials provided with the distribution. | ||
| 16 | // * Neither the name of the owner nor the names of its contributors may | ||
| 17 | // be used to endorse or promote products derived from this software | ||
| 18 | // without specific prior written permission. | ||
| 19 | // | ||
| 20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 21 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 22 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 23 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 24 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 25 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 26 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 27 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 28 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 29 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 30 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 31 | |||
| 32 | #pragma once | ||
| 33 | |||
| 34 | #include <cmath> | ||
| 35 | |||
| 36 | namespace Math { | ||
| 37 | |||
| 38 | template<typename T> class Vec2; | ||
| 39 | template<typename T> class Vec3; | ||
| 40 | template<typename T> class Vec4; | ||
| 41 | |||
| 42 | |||
| 43 | template<typename T> | ||
| 44 | class Vec2 { | ||
| 45 | public: | ||
| 46 | struct { | ||
| 47 | T x,y; | ||
| 48 | }; | ||
| 49 | |||
| 50 | T* AsArray() { return &x; } | ||
| 51 | |||
| 52 | Vec2() = default; | ||
| 53 | Vec2(const T a[2]) : x(a[0]), y(a[1]) {} | ||
| 54 | Vec2(const T& _x, const T& _y) : x(_x), y(_y) {} | ||
| 55 | |||
| 56 | template<typename T2> | ||
| 57 | Vec2<T2> Cast() const { | ||
| 58 | return Vec2<T2>((T2)x, (T2)y); | ||
| 59 | } | ||
| 60 | |||
| 61 | static Vec2 AssignToAll(const T& f) | ||
| 62 | { | ||
| 63 | return Vec2<T>(f, f); | ||
| 64 | } | ||
| 65 | |||
| 66 | void Write(T a[2]) | ||
| 67 | { | ||
| 68 | a[0] = x; a[1] = y; | ||
| 69 | } | ||
| 70 | |||
| 71 | Vec2 operator +(const Vec2& other) const | ||
| 72 | { | ||
| 73 | return Vec2(x+other.x, y+other.y); | ||
| 74 | } | ||
| 75 | void operator += (const Vec2 &other) | ||
| 76 | { | ||
| 77 | x+=other.x; y+=other.y; | ||
| 78 | } | ||
| 79 | Vec2 operator -(const Vec2& other) const | ||
| 80 | { | ||
| 81 | return Vec2(x-other.x, y-other.y); | ||
| 82 | } | ||
| 83 | void operator -= (const Vec2& other) | ||
| 84 | { | ||
| 85 | x-=other.x; y-=other.y; | ||
| 86 | } | ||
| 87 | Vec2 operator -() const | ||
| 88 | { | ||
| 89 | return Vec2(-x,-y); | ||
| 90 | } | ||
| 91 | Vec2 operator * (const Vec2& other) const | ||
| 92 | { | ||
| 93 | return Vec2(x*other.x, y*other.y); | ||
| 94 | } | ||
| 95 | template<typename V> | ||
| 96 | Vec2 operator * (const V& f) const | ||
| 97 | { | ||
| 98 | return Vec2(x*f,y*f); | ||
| 99 | } | ||
| 100 | template<typename V> | ||
| 101 | void operator *= (const V& f) | ||
| 102 | { | ||
| 103 | x*=f; y*=f; | ||
| 104 | } | ||
| 105 | template<typename V> | ||
| 106 | Vec2 operator / (const V& f) const | ||
| 107 | { | ||
| 108 | return Vec2(x/f,y/f); | ||
| 109 | } | ||
| 110 | template<typename V> | ||
| 111 | void operator /= (const V& f) | ||
| 112 | { | ||
| 113 | *this = *this / f; | ||
| 114 | } | ||
| 115 | |||
| 116 | T Length2() const | ||
| 117 | { | ||
| 118 | return x*x + y*y; | ||
| 119 | } | ||
| 120 | |||
| 121 | // Only implemented for T=float | ||
| 122 | float Length() const; | ||
| 123 | void SetLength(const float l); | ||
| 124 | Vec2 WithLength(const float l) const; | ||
| 125 | float Distance2To(Vec2 &other); | ||
| 126 | Vec2 Normalized() const; | ||
| 127 | float Normalize(); // returns the previous length, which is often useful | ||
| 128 | |||
| 129 | T& operator [] (int i) //allow vector[1] = 3 (vector.y=3) | ||
| 130 | { | ||
| 131 | return *((&x) + i); | ||
| 132 | } | ||
| 133 | T operator [] (const int i) const | ||
| 134 | { | ||
| 135 | return *((&x) + i); | ||
| 136 | } | ||
| 137 | |||
| 138 | void SetZero() | ||
| 139 | { | ||
| 140 | x=0; y=0; | ||
| 141 | } | ||
| 142 | |||
| 143 | // Common aliases: UV (texel coordinates), ST (texture coordinates) | ||
| 144 | T& u() { return x; } | ||
| 145 | T& v() { return y; } | ||
| 146 | T& s() { return x; } | ||
| 147 | T& t() { return y; } | ||
| 148 | |||
| 149 | const T& u() const { return x; } | ||
| 150 | const T& v() const { return y; } | ||
| 151 | const T& s() const { return x; } | ||
| 152 | const T& t() const { return y; } | ||
| 153 | |||
| 154 | // swizzlers - create a subvector of specific components | ||
| 155 | Vec2 yx() const { return Vec2(y, x); } | ||
| 156 | Vec2 vu() const { return Vec2(y, x); } | ||
| 157 | Vec2 ts() const { return Vec2(y, x); } | ||
| 158 | |||
| 159 | // Inserters to add new elements to effectively create larger vectors containing this Vec2 | ||
| 160 | Vec3<T> InsertBeforeX(const T& value) { | ||
| 161 | return Vec3<T>(value, x, y); | ||
| 162 | } | ||
| 163 | Vec3<T> InsertBeforeY(const T& value) { | ||
| 164 | return Vec3<T>(x, value, y); | ||
| 165 | } | ||
| 166 | Vec3<T> Append(const T& value) { | ||
| 167 | return Vec3<T>(x, y, value); | ||
| 168 | } | ||
| 169 | }; | ||
| 170 | |||
| 171 | template<typename T, typename V> | ||
| 172 | Vec2<T> operator * (const V& f, const Vec2<T>& vec) | ||
| 173 | { | ||
| 174 | return Vec2<T>(f*vec.x,f*vec.y); | ||
| 175 | } | ||
| 176 | |||
| 177 | typedef Vec2<float> Vec2f; | ||
| 178 | |||
| 179 | template<typename T> | ||
| 180 | class Vec3 | ||
| 181 | { | ||
| 182 | public: | ||
| 183 | struct | ||
| 184 | { | ||
| 185 | T x,y,z; | ||
| 186 | }; | ||
| 187 | |||
| 188 | T* AsArray() { return &x; } | ||
| 189 | |||
| 190 | Vec3() = default; | ||
| 191 | Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {} | ||
| 192 | Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {} | ||
| 193 | |||
| 194 | template<typename T2> | ||
| 195 | Vec3<T2> Cast() const { | ||
| 196 | return Vec3<T2>((T2)x, (T2)y, (T2)z); | ||
| 197 | } | ||
| 198 | |||
| 199 | // Only implemented for T=int and T=float | ||
| 200 | static Vec3 FromRGB(unsigned int rgb); | ||
| 201 | unsigned int ToRGB() const; // alpha bits set to zero | ||
| 202 | |||
| 203 | static Vec3 AssignToAll(const T& f) | ||
| 204 | { | ||
| 205 | return Vec3<T>(f, f, f); | ||
| 206 | } | ||
| 207 | |||
| 208 | void Write(T a[3]) | ||
| 209 | { | ||
| 210 | a[0] = x; a[1] = y; a[2] = z; | ||
| 211 | } | ||
| 212 | |||
| 213 | Vec3 operator +(const Vec3 &other) const | ||
| 214 | { | ||
| 215 | return Vec3(x+other.x, y+other.y, z+other.z); | ||
| 216 | } | ||
| 217 | void operator += (const Vec3 &other) | ||
| 218 | { | ||
| 219 | x+=other.x; y+=other.y; z+=other.z; | ||
| 220 | } | ||
| 221 | Vec3 operator -(const Vec3 &other) const | ||
| 222 | { | ||
| 223 | return Vec3(x-other.x, y-other.y, z-other.z); | ||
| 224 | } | ||
| 225 | void operator -= (const Vec3 &other) | ||
| 226 | { | ||
| 227 | x-=other.x; y-=other.y; z-=other.z; | ||
| 228 | } | ||
| 229 | Vec3 operator -() const | ||
| 230 | { | ||
| 231 | return Vec3(-x,-y,-z); | ||
| 232 | } | ||
| 233 | Vec3 operator * (const Vec3 &other) const | ||
| 234 | { | ||
| 235 | return Vec3(x*other.x, y*other.y, z*other.z); | ||
| 236 | } | ||
| 237 | template<typename V> | ||
| 238 | Vec3 operator * (const V& f) const | ||
| 239 | { | ||
| 240 | return Vec3(x*f,y*f,z*f); | ||
| 241 | } | ||
| 242 | template<typename V> | ||
| 243 | void operator *= (const V& f) | ||
| 244 | { | ||
| 245 | x*=f; y*=f; z*=f; | ||
| 246 | } | ||
| 247 | template<typename V> | ||
| 248 | Vec3 operator / (const V& f) const | ||
| 249 | { | ||
| 250 | return Vec3(x/f,y/f,z/f); | ||
| 251 | } | ||
| 252 | template<typename V> | ||
| 253 | void operator /= (const V& f) | ||
| 254 | { | ||
| 255 | *this = *this / f; | ||
| 256 | } | ||
| 257 | |||
| 258 | T Length2() const | ||
| 259 | { | ||
| 260 | return x*x + y*y + z*z; | ||
| 261 | } | ||
| 262 | |||
| 263 | // Only implemented for T=float | ||
| 264 | float Length() const; | ||
| 265 | void SetLength(const float l); | ||
| 266 | Vec3 WithLength(const float l) const; | ||
| 267 | float Distance2To(Vec3 &other); | ||
| 268 | Vec3 Normalized() const; | ||
| 269 | float Normalize(); // returns the previous length, which is often useful | ||
| 270 | |||
| 271 | T& operator [] (int i) //allow vector[2] = 3 (vector.z=3) | ||
| 272 | { | ||
| 273 | return *((&x) + i); | ||
| 274 | } | ||
| 275 | T operator [] (const int i) const | ||
| 276 | { | ||
| 277 | return *((&x) + i); | ||
| 278 | } | ||
| 279 | |||
| 280 | void SetZero() | ||
| 281 | { | ||
| 282 | x=0; y=0; z=0; | ||
| 283 | } | ||
| 284 | |||
| 285 | // Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates) | ||
| 286 | T& u() { return x; } | ||
| 287 | T& v() { return y; } | ||
| 288 | T& w() { return z; } | ||
| 289 | |||
| 290 | T& r() { return x; } | ||
| 291 | T& g() { return y; } | ||
| 292 | T& b() { return z; } | ||
| 293 | |||
| 294 | T& s() { return x; } | ||
| 295 | T& t() { return y; } | ||
| 296 | T& q() { return z; } | ||
| 297 | |||
| 298 | const T& u() const { return x; } | ||
| 299 | const T& v() const { return y; } | ||
| 300 | const T& w() const { return z; } | ||
| 301 | |||
| 302 | const T& r() const { return x; } | ||
| 303 | const T& g() const { return y; } | ||
| 304 | const T& b() const { return z; } | ||
| 305 | |||
| 306 | const T& s() const { return x; } | ||
| 307 | const T& t() const { return y; } | ||
| 308 | const T& q() const { return z; } | ||
| 309 | |||
| 310 | // swizzlers - create a subvector of specific components | ||
| 311 | // e.g. Vec2 uv() { return Vec2(x,y); } | ||
| 312 | // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) | ||
| 313 | #define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } | ||
| 314 | #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ | ||
| 315 | _DEFINE_SWIZZLER2(a, b, a##b); \ | ||
| 316 | _DEFINE_SWIZZLER2(a, b, a2##b2); \ | ||
| 317 | _DEFINE_SWIZZLER2(a, b, a3##b3); \ | ||
| 318 | _DEFINE_SWIZZLER2(a, b, a4##b4); \ | ||
| 319 | _DEFINE_SWIZZLER2(b, a, b##a); \ | ||
| 320 | _DEFINE_SWIZZLER2(b, a, b2##a2); \ | ||
| 321 | _DEFINE_SWIZZLER2(b, a, b3##a3); \ | ||
| 322 | _DEFINE_SWIZZLER2(b, a, b4##a4); | ||
| 323 | |||
| 324 | DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); | ||
| 325 | DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); | ||
| 326 | DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); | ||
| 327 | #undef DEFINE_SWIZZLER2 | ||
| 328 | #undef _DEFINE_SWIZZLER2 | ||
| 329 | |||
| 330 | // Inserters to add new elements to effectively create larger vectors containing this Vec2 | ||
| 331 | Vec4<T> InsertBeforeX(const T& value) { | ||
| 332 | return Vec4<T>(value, x, y, z); | ||
| 333 | } | ||
| 334 | Vec4<T> InsertBeforeY(const T& value) { | ||
| 335 | return Vec4<T>(x, value, y, z); | ||
| 336 | } | ||
| 337 | Vec4<T> InsertBeforeZ(const T& value) { | ||
| 338 | return Vec4<T>(x, y, value, z); | ||
| 339 | } | ||
| 340 | Vec4<T> Append(const T& value) { | ||
| 341 | return Vec4<T>(x, y, z, value); | ||
| 342 | } | ||
| 343 | }; | ||
| 344 | |||
| 345 | template<typename T, typename V> | ||
| 346 | Vec3<T> operator * (const V& f, const Vec3<T>& vec) | ||
| 347 | { | ||
| 348 | return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); | ||
| 349 | } | ||
| 350 | |||
| 351 | typedef Vec3<float> Vec3f; | ||
| 352 | |||
| 353 | template<typename T> | ||
| 354 | class Vec4 | ||
| 355 | { | ||
| 356 | public: | ||
| 357 | struct | ||
| 358 | { | ||
| 359 | T x,y,z,w; | ||
| 360 | }; | ||
| 361 | |||
| 362 | T* AsArray() { return &x; } | ||
| 363 | |||
| 364 | Vec4() = default; | ||
| 365 | Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {} | ||
| 366 | Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {} | ||
| 367 | |||
| 368 | template<typename T2> | ||
| 369 | Vec4<T2> Cast() const { | ||
| 370 | return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w); | ||
| 371 | } | ||
| 372 | |||
| 373 | // Only implemented for T=int and T=float | ||
| 374 | static Vec4 FromRGBA(unsigned int rgba); | ||
| 375 | unsigned int ToRGBA() const; | ||
| 376 | |||
| 377 | static Vec4 AssignToAll(const T& f) { | ||
| 378 | return Vec4<T>(f, f, f, f); | ||
| 379 | } | ||
| 380 | |||
| 381 | void Write(T a[4]) | ||
| 382 | { | ||
| 383 | a[0] = x; a[1] = y; a[2] = z; a[3] = w; | ||
| 384 | } | ||
| 385 | |||
| 386 | Vec4 operator +(const Vec4& other) const | ||
| 387 | { | ||
| 388 | return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); | ||
| 389 | } | ||
| 390 | void operator += (const Vec4& other) | ||
| 391 | { | ||
| 392 | x+=other.x; y+=other.y; z+=other.z; w+=other.w; | ||
| 393 | } | ||
| 394 | Vec4 operator -(const Vec4 &other) const | ||
| 395 | { | ||
| 396 | return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); | ||
| 397 | } | ||
| 398 | void operator -= (const Vec4 &other) | ||
| 399 | { | ||
| 400 | x-=other.x; y-=other.y; z-=other.z; w-=other.w; | ||
| 401 | } | ||
| 402 | Vec4 operator -() const | ||
| 403 | { | ||
| 404 | return Vec4(-x,-y,-z,-w); | ||
| 405 | } | ||
| 406 | Vec4 operator * (const Vec4 &other) const | ||
| 407 | { | ||
| 408 | return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); | ||
| 409 | } | ||
| 410 | template<typename V> | ||
| 411 | Vec4 operator * (const V& f) const | ||
| 412 | { | ||
| 413 | return Vec4(x*f,y*f,z*f,w*f); | ||
| 414 | } | ||
| 415 | template<typename V> | ||
| 416 | void operator *= (const V& f) | ||
| 417 | { | ||
| 418 | x*=f; y*=f; z*=f; w*=f; | ||
| 419 | } | ||
| 420 | template<typename V> | ||
| 421 | Vec4 operator / (const V& f) const | ||
| 422 | { | ||
| 423 | return Vec4(x/f,y/f,z/f,w/f); | ||
| 424 | } | ||
| 425 | template<typename V> | ||
| 426 | void operator /= (const V& f) | ||
| 427 | { | ||
| 428 | *this = *this / f; | ||
| 429 | } | ||
| 430 | |||
| 431 | T Length2() const | ||
| 432 | { | ||
| 433 | return x*x + y*y + z*z + w*w; | ||
| 434 | } | ||
| 435 | |||
| 436 | // Only implemented for T=float | ||
| 437 | float Length() const; | ||
| 438 | void SetLength(const float l); | ||
| 439 | Vec4 WithLength(const float l) const; | ||
| 440 | float Distance2To(Vec4 &other); | ||
| 441 | Vec4 Normalized() const; | ||
| 442 | float Normalize(); // returns the previous length, which is often useful | ||
| 443 | |||
| 444 | T& operator [] (int i) //allow vector[2] = 3 (vector.z=3) | ||
| 445 | { | ||
| 446 | return *((&x) + i); | ||
| 447 | } | ||
| 448 | T operator [] (const int i) const | ||
| 449 | { | ||
| 450 | return *((&x) + i); | ||
| 451 | } | ||
| 452 | |||
| 453 | void SetZero() | ||
| 454 | { | ||
| 455 | x=0; y=0; z=0; | ||
| 456 | } | ||
| 457 | |||
| 458 | // Common alias: RGBA (colors) | ||
| 459 | T& r() { return x; } | ||
| 460 | T& g() { return y; } | ||
| 461 | T& b() { return z; } | ||
| 462 | T& a() { return w; } | ||
| 463 | |||
| 464 | const T& r() const { return x; } | ||
| 465 | const T& g() const { return y; } | ||
| 466 | const T& b() const { return z; } | ||
| 467 | const T& a() const { return w; } | ||
| 468 | |||
| 469 | // swizzlers - create a subvector of specific components | ||
| 470 | // e.g. Vec2 uv() { return Vec2(x,y); } | ||
| 471 | // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) | ||
| 472 | #define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } | ||
| 473 | #define DEFINE_SWIZZLER2(a, b, a2, b2) \ | ||
| 474 | _DEFINE_SWIZZLER2(a, b, a##b); \ | ||
| 475 | _DEFINE_SWIZZLER2(a, b, a2##b2); \ | ||
| 476 | _DEFINE_SWIZZLER2(b, a, b##a); \ | ||
| 477 | _DEFINE_SWIZZLER2(b, a, b2##a2); | ||
| 478 | |||
| 479 | DEFINE_SWIZZLER2(x, y, r, g); | ||
| 480 | DEFINE_SWIZZLER2(x, z, r, b); | ||
| 481 | DEFINE_SWIZZLER2(x, w, r, a); | ||
| 482 | DEFINE_SWIZZLER2(y, z, g, b); | ||
| 483 | DEFINE_SWIZZLER2(y, w, g, a); | ||
| 484 | DEFINE_SWIZZLER2(z, w, b, a); | ||
| 485 | #undef DEFINE_SWIZZLER2 | ||
| 486 | #undef _DEFINE_SWIZZLER2 | ||
| 487 | |||
| 488 | #define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); } | ||
| 489 | #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ | ||
| 490 | _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ | ||
| 491 | _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ | ||
| 492 | _DEFINE_SWIZZLER3(b, a, c, b##a##c); \ | ||
| 493 | _DEFINE_SWIZZLER3(b, c, a, b##c##a); \ | ||
| 494 | _DEFINE_SWIZZLER3(c, a, b, c##a##b); \ | ||
| 495 | _DEFINE_SWIZZLER3(c, b, a, c##b##a); \ | ||
| 496 | _DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \ | ||
| 497 | _DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \ | ||
| 498 | _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ | ||
| 499 | _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ | ||
| 500 | _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ | ||
| 501 | _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2); | ||
| 502 | |||
| 503 | DEFINE_SWIZZLER3(x, y, z, r, g, b); | ||
| 504 | DEFINE_SWIZZLER3(x, y, w, r, g, a); | ||
| 505 | DEFINE_SWIZZLER3(x, z, w, r, b, a); | ||
| 506 | DEFINE_SWIZZLER3(y, z, w, g, b, a); | ||
| 507 | #undef DEFINE_SWIZZLER3 | ||
| 508 | #undef _DEFINE_SWIZZLER3 | ||
| 509 | }; | ||
| 510 | |||
| 511 | |||
| 512 | template<typename T, typename V> | ||
| 513 | Vec4<T> operator * (const V& f, const Vec4<T>& vec) | ||
| 514 | { | ||
| 515 | return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w); | ||
| 516 | } | ||
| 517 | |||
| 518 | typedef Vec4<float> Vec4f; | ||
| 519 | |||
| 520 | |||
| 521 | template<typename T> | ||
| 522 | static inline T Dot(const Vec2<T>& a, const Vec2<T>& b) | ||
| 523 | { | ||
| 524 | return a.x*b.x + a.y*b.y; | ||
| 525 | } | ||
| 526 | |||
| 527 | template<typename T> | ||
| 528 | static inline T Dot(const Vec3<T>& a, const Vec3<T>& b) | ||
| 529 | { | ||
| 530 | return a.x*b.x + a.y*b.y + a.z*b.z; | ||
| 531 | } | ||
| 532 | |||
| 533 | template<typename T> | ||
| 534 | static inline T Dot(const Vec4<T>& a, const Vec4<T>& b) | ||
| 535 | { | ||
| 536 | return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; | ||
| 537 | } | ||
| 538 | |||
| 539 | template<typename T> | ||
| 540 | static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b) | ||
| 541 | { | ||
| 542 | return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); | ||
| 543 | } | ||
| 544 | |||
| 545 | // linear interpolation via float: 0.0=begin, 1.0=end | ||
| 546 | template<typename X> | ||
| 547 | static inline X Lerp(const X& begin, const X& end, const float t) | ||
| 548 | { | ||
| 549 | return begin*(1.f-t) + end*t; | ||
| 550 | } | ||
| 551 | |||
| 552 | // linear interpolation via int: 0=begin, base=end | ||
| 553 | template<typename X, int base> | ||
| 554 | static inline X LerpInt(const X& begin, const X& end, const int t) | ||
| 555 | { | ||
| 556 | return (begin*(base-t) + end*t) / base; | ||
| 557 | } | ||
| 558 | |||
| 559 | // Utility vector factories | ||
| 560 | template<typename T> | ||
| 561 | static inline Vec2<T> MakeVec2(const T& x, const T& y) | ||
| 562 | { | ||
| 563 | return Vec2<T>{x, y}; | ||
| 564 | } | ||
| 565 | |||
| 566 | template<typename T> | ||
| 567 | static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z) | ||
| 568 | { | ||
| 569 | return Vec3<T>{x, y, z}; | ||
| 570 | } | ||
| 571 | |||
| 572 | template<typename T> | ||
| 573 | static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w) | ||
| 574 | { | ||
| 575 | return Vec4<T>{x, y, z, w}; | ||
| 576 | } | ||
| 577 | |||
| 578 | } // namespace | ||
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index f0fa3aba9..81af57336 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -4,126 +4,567 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 7 | #include <initializer_list> | 8 | #include <initializer_list> |
| 8 | #include <map> | 9 | #include <map> |
| 9 | 10 | ||
| 10 | #include "common/bit_field.h" | 11 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 12 | #include "common/register_set.h" | 13 | |
| 14 | #include "core/mem_map.h" | ||
| 13 | 15 | ||
| 14 | namespace Pica { | 16 | namespace Pica { |
| 15 | 17 | ||
| 18 | // Returns index corresponding to the Regs member labeled by field_name | ||
| 19 | // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions | ||
| 20 | // when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). | ||
| 21 | // For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members | ||
| 22 | // Hopefully, this will be fixed sometime in the future. | ||
| 23 | // For lack of better alternatives, we currently hardcode the offsets when constant | ||
| 24 | // expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts | ||
| 25 | // will then make sure the offsets indeed match the automatically calculated ones). | ||
| 26 | #define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32)) | ||
| 27 | #if defined(_MSC_VER) | ||
| 28 | #define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index) | ||
| 29 | #else | ||
| 30 | // NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler | ||
| 31 | // really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX | ||
| 32 | // and then performs a (no-op) cast to size_t iff the second argument matches the expected | ||
| 33 | // field offset. Otherwise, the compiler will fail to compile this code. | ||
| 34 | #define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ | ||
| 35 | ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name)) | ||
| 36 | #endif // _MSC_VER | ||
| 37 | |||
| 16 | struct Regs { | 38 | struct Regs { |
| 17 | enum Id : u32 { | 39 | |
| 18 | ViewportSizeX = 0x41, | 40 | // helper macro to properly align structure members. |
| 19 | ViewportInvSizeX = 0x42, | 41 | // Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121", |
| 20 | ViewportSizeY = 0x43, | 42 | // depending on the current source line to make sure variable names are unique. |
| 21 | ViewportInvSizeY = 0x44, | 43 | #define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y |
| 22 | ViewportCorner = 0x68, | 44 | #define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y) |
| 23 | DepthBufferFormat = 0x116, | 45 | #define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)]; |
| 24 | ColorBufferFormat = 0x117, | 46 | |
| 25 | DepthBufferAddress = 0x11C, | 47 | INSERT_PADDING_WORDS(0x41); |
| 26 | ColorBufferAddress = 0x11D, | 48 | |
| 27 | ColorBufferSize = 0x11E, | 49 | BitField<0, 24, u32> viewport_size_x; |
| 28 | 50 | INSERT_PADDING_WORDS(0x1); | |
| 29 | VertexArrayBaseAddr = 0x200, | 51 | BitField<0, 24, u32> viewport_size_y; |
| 30 | VertexDescriptor = 0x201, // 0x202 | 52 | |
| 31 | VertexAttributeOffset = 0x203, // 0x206,0x209,0x20C,0x20F,0x212,0x215,0x218,0x21B,0x21E,0x221,0x224 | 53 | INSERT_PADDING_WORDS(0x9); |
| 32 | VertexAttributeInfo0 = 0x204, // 0x207,0x20A,0x20D,0x210,0x213,0x216,0x219,0x21C,0x21F,0x222,0x225 | 54 | |
| 33 | VertexAttributeInfo1 = 0x205, // 0x208,0x20B,0x20E,0x211,0x214,0x217,0x21A,0x21D,0x220,0x223,0x226 | 55 | BitField<0, 24, u32> viewport_depth_range; // float24 |
| 34 | 56 | BitField<0, 24, u32> viewport_depth_far_plane; // float24 | |
| 35 | NumIds = 0x300, | 57 | |
| 58 | INSERT_PADDING_WORDS(0x1); | ||
| 59 | |||
| 60 | union { | ||
| 61 | // Maps components of output vertex attributes to semantics | ||
| 62 | enum Semantic : u32 | ||
| 63 | { | ||
| 64 | POSITION_X = 0, | ||
| 65 | POSITION_Y = 1, | ||
| 66 | POSITION_Z = 2, | ||
| 67 | POSITION_W = 3, | ||
| 68 | |||
| 69 | COLOR_R = 8, | ||
| 70 | COLOR_G = 9, | ||
| 71 | COLOR_B = 10, | ||
| 72 | COLOR_A = 11, | ||
| 73 | |||
| 74 | TEXCOORD0_U = 12, | ||
| 75 | TEXCOORD0_V = 13, | ||
| 76 | TEXCOORD1_U = 14, | ||
| 77 | TEXCOORD1_V = 15, | ||
| 78 | TEXCOORD2_U = 22, | ||
| 79 | TEXCOORD2_V = 23, | ||
| 80 | |||
| 81 | INVALID = 31, | ||
| 82 | }; | ||
| 83 | |||
| 84 | BitField< 0, 5, Semantic> map_x; | ||
| 85 | BitField< 8, 5, Semantic> map_y; | ||
| 86 | BitField<16, 5, Semantic> map_z; | ||
| 87 | BitField<24, 5, Semantic> map_w; | ||
| 88 | } vs_output_attributes[7]; | ||
| 89 | |||
| 90 | INSERT_PADDING_WORDS(0x11); | ||
| 91 | |||
| 92 | union { | ||
| 93 | BitField< 0, 16, u32> x; | ||
| 94 | BitField<16, 16, u32> y; | ||
| 95 | } viewport_corner; | ||
| 96 | |||
| 97 | INSERT_PADDING_WORDS(0xa7); | ||
| 98 | |||
| 99 | struct { | ||
| 100 | enum ColorFormat : u32 { | ||
| 101 | RGBA8 = 0, | ||
| 102 | RGB8 = 1, | ||
| 103 | RGBA5551 = 2, | ||
| 104 | RGB565 = 3, | ||
| 105 | RGBA4 = 4, | ||
| 106 | }; | ||
| 107 | |||
| 108 | INSERT_PADDING_WORDS(0x6); | ||
| 109 | |||
| 110 | u32 depth_format; | ||
| 111 | u32 color_format; | ||
| 112 | |||
| 113 | INSERT_PADDING_WORDS(0x4); | ||
| 114 | |||
| 115 | u32 depth_buffer_address; | ||
| 116 | u32 color_buffer_address; | ||
| 117 | |||
| 118 | union { | ||
| 119 | // Apparently, the framebuffer width is stored as expected, | ||
| 120 | // while the height is stored as the actual height minus one. | ||
| 121 | // Hence, don't access these fields directly but use the accessors | ||
| 122 | // GetWidth() and GetHeight() instead. | ||
| 123 | BitField< 0, 11, u32> width; | ||
| 124 | BitField<12, 10, u32> height; | ||
| 125 | }; | ||
| 126 | |||
| 127 | INSERT_PADDING_WORDS(0x1); | ||
| 128 | |||
| 129 | inline u32 GetColorBufferAddress() const { | ||
| 130 | return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(color_buffer_address)); | ||
| 131 | } | ||
| 132 | inline u32 GetDepthBufferAddress() const { | ||
| 133 | return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(depth_buffer_address)); | ||
| 134 | } | ||
| 135 | |||
| 136 | inline u32 GetWidth() const { | ||
| 137 | return width; | ||
| 138 | } | ||
| 139 | |||
| 140 | inline u32 GetHeight() const { | ||
| 141 | return height + 1; | ||
| 142 | } | ||
| 143 | } framebuffer; | ||
| 144 | |||
| 145 | INSERT_PADDING_WORDS(0xe0); | ||
| 146 | |||
| 147 | struct { | ||
| 148 | enum class Format : u64 { | ||
| 149 | BYTE = 0, | ||
| 150 | UBYTE = 1, | ||
| 151 | SHORT = 2, | ||
| 152 | FLOAT = 3, | ||
| 153 | }; | ||
| 154 | |||
| 155 | BitField<0, 29, u32> base_address; | ||
| 156 | |||
| 157 | inline u32 GetBaseAddress() const { | ||
| 158 | // TODO: Ugly, should fix PhysicalToVirtualAddress instead | ||
| 159 | return DecodeAddressRegister(base_address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR; | ||
| 160 | } | ||
| 161 | |||
| 162 | // Descriptor for internal vertex attributes | ||
| 163 | union { | ||
| 164 | BitField< 0, 2, Format> format0; // size of one element | ||
| 165 | BitField< 2, 2, u64> size0; // number of elements minus 1 | ||
| 166 | BitField< 4, 2, Format> format1; | ||
| 167 | BitField< 6, 2, u64> size1; | ||
| 168 | BitField< 8, 2, Format> format2; | ||
| 169 | BitField<10, 2, u64> size2; | ||
| 170 | BitField<12, 2, Format> format3; | ||
| 171 | BitField<14, 2, u64> size3; | ||
| 172 | BitField<16, 2, Format> format4; | ||
| 173 | BitField<18, 2, u64> size4; | ||
| 174 | BitField<20, 2, Format> format5; | ||
| 175 | BitField<22, 2, u64> size5; | ||
| 176 | BitField<24, 2, Format> format6; | ||
| 177 | BitField<26, 2, u64> size6; | ||
| 178 | BitField<28, 2, Format> format7; | ||
| 179 | BitField<30, 2, u64> size7; | ||
| 180 | BitField<32, 2, Format> format8; | ||
| 181 | BitField<34, 2, u64> size8; | ||
| 182 | BitField<36, 2, Format> format9; | ||
| 183 | BitField<38, 2, u64> size9; | ||
| 184 | BitField<40, 2, Format> format10; | ||
| 185 | BitField<42, 2, u64> size10; | ||
| 186 | BitField<44, 2, Format> format11; | ||
| 187 | BitField<46, 2, u64> size11; | ||
| 188 | |||
| 189 | BitField<48, 12, u64> attribute_mask; | ||
| 190 | |||
| 191 | // number of total attributes minus 1 | ||
| 192 | BitField<60, 4, u64> num_extra_attributes; | ||
| 193 | }; | ||
| 194 | |||
| 195 | inline Format GetFormat(int n) const { | ||
| 196 | Format formats[] = { | ||
| 197 | format0, format1, format2, format3, | ||
| 198 | format4, format5, format6, format7, | ||
| 199 | format8, format9, format10, format11 | ||
| 200 | }; | ||
| 201 | return formats[n]; | ||
| 202 | } | ||
| 203 | |||
| 204 | inline int GetNumElements(int n) const { | ||
| 205 | u64 sizes[] = { | ||
| 206 | size0, size1, size2, size3, | ||
| 207 | size4, size5, size6, size7, | ||
| 208 | size8, size9, size10, size11 | ||
| 209 | }; | ||
| 210 | return (int)sizes[n]+1; | ||
| 211 | } | ||
| 212 | |||
| 213 | inline int GetElementSizeInBytes(int n) const { | ||
| 214 | return (GetFormat(n) == Format::FLOAT) ? 4 : | ||
| 215 | (GetFormat(n) == Format::SHORT) ? 2 : 1; | ||
| 216 | } | ||
| 217 | |||
| 218 | inline int GetStride(int n) const { | ||
| 219 | return GetNumElements(n) * GetElementSizeInBytes(n); | ||
| 220 | } | ||
| 221 | |||
| 222 | inline int GetNumTotalAttributes() const { | ||
| 223 | return (int)num_extra_attributes+1; | ||
| 224 | } | ||
| 225 | |||
| 226 | // Attribute loaders map the source vertex data to input attributes | ||
| 227 | // This e.g. allows to load different attributes from different memory locations | ||
| 228 | struct { | ||
| 229 | // Source attribute data offset from the base address | ||
| 230 | u32 data_offset; | ||
| 231 | |||
| 232 | union { | ||
| 233 | BitField< 0, 4, u64> comp0; | ||
| 234 | BitField< 4, 4, u64> comp1; | ||
| 235 | BitField< 8, 4, u64> comp2; | ||
| 236 | BitField<12, 4, u64> comp3; | ||
| 237 | BitField<16, 4, u64> comp4; | ||
| 238 | BitField<20, 4, u64> comp5; | ||
| 239 | BitField<24, 4, u64> comp6; | ||
| 240 | BitField<28, 4, u64> comp7; | ||
| 241 | BitField<32, 4, u64> comp8; | ||
| 242 | BitField<36, 4, u64> comp9; | ||
| 243 | BitField<40, 4, u64> comp10; | ||
| 244 | BitField<44, 4, u64> comp11; | ||
| 245 | |||
| 246 | // bytes for a single vertex in this loader | ||
| 247 | BitField<48, 8, u64> byte_count; | ||
| 248 | |||
| 249 | BitField<60, 4, u64> component_count; | ||
| 250 | }; | ||
| 251 | |||
| 252 | inline int GetComponent(int n) const { | ||
| 253 | u64 components[] = { | ||
| 254 | comp0, comp1, comp2, comp3, | ||
| 255 | comp4, comp5, comp6, comp7, | ||
| 256 | comp8, comp9, comp10, comp11 | ||
| 257 | }; | ||
| 258 | return (int)components[n]; | ||
| 259 | } | ||
| 260 | } attribute_loaders[12]; | ||
| 261 | } vertex_attributes; | ||
| 262 | |||
| 263 | struct { | ||
| 264 | enum IndexFormat : u32 { | ||
| 265 | BYTE = 0, | ||
| 266 | SHORT = 1, | ||
| 267 | }; | ||
| 268 | |||
| 269 | union { | ||
| 270 | BitField<0, 31, u32> offset; // relative to base attribute address | ||
| 271 | BitField<31, 1, IndexFormat> format; | ||
| 272 | }; | ||
| 273 | } index_array; | ||
| 274 | |||
| 275 | // Number of vertices to render | ||
| 276 | u32 num_vertices; | ||
| 277 | |||
| 278 | INSERT_PADDING_WORDS(0x5); | ||
| 279 | |||
| 280 | // These two trigger rendering of triangles | ||
| 281 | u32 trigger_draw; | ||
| 282 | u32 trigger_draw_indexed; | ||
| 283 | |||
| 284 | INSERT_PADDING_WORDS(0x2e); | ||
| 285 | |||
| 286 | enum class TriangleTopology : u32 { | ||
| 287 | List = 0, | ||
| 288 | Strip = 1, | ||
| 289 | Fan = 2, | ||
| 290 | ListIndexed = 3, // TODO: No idea if this is correct | ||
| 36 | }; | 291 | }; |
| 37 | 292 | ||
| 38 | template<Id id> | 293 | BitField<8, 2, TriangleTopology> triangle_topology; |
| 39 | union Struct; | ||
| 40 | }; | ||
| 41 | 294 | ||
| 42 | static inline Regs::Id VertexAttributeOffset(int n) | 295 | INSERT_PADDING_WORDS(0x5b); |
| 43 | { | ||
| 44 | return static_cast<Regs::Id>(0x203 + 3*n); | ||
| 45 | } | ||
| 46 | 296 | ||
| 47 | static inline Regs::Id VertexAttributeInfo0(int n) | 297 | // Offset to shader program entry point (in words) |
| 48 | { | 298 | BitField<0, 16, u32> vs_main_offset; |
| 49 | return static_cast<Regs::Id>(0x204 + 3*n); | ||
| 50 | } | ||
| 51 | 299 | ||
| 52 | static inline Regs::Id VertexAttributeInfo1(int n) | 300 | union { |
| 53 | { | 301 | BitField< 0, 4, u64> attribute0_register; |
| 54 | return static_cast<Regs::Id>(0x205 + 3*n); | 302 | BitField< 4, 4, u64> attribute1_register; |
| 55 | } | 303 | BitField< 8, 4, u64> attribute2_register; |
| 304 | BitField<12, 4, u64> attribute3_register; | ||
| 305 | BitField<16, 4, u64> attribute4_register; | ||
| 306 | BitField<20, 4, u64> attribute5_register; | ||
| 307 | BitField<24, 4, u64> attribute6_register; | ||
| 308 | BitField<28, 4, u64> attribute7_register; | ||
| 309 | BitField<32, 4, u64> attribute8_register; | ||
| 310 | BitField<36, 4, u64> attribute9_register; | ||
| 311 | BitField<40, 4, u64> attribute10_register; | ||
| 312 | BitField<44, 4, u64> attribute11_register; | ||
| 313 | BitField<48, 4, u64> attribute12_register; | ||
| 314 | BitField<52, 4, u64> attribute13_register; | ||
| 315 | BitField<56, 4, u64> attribute14_register; | ||
| 316 | BitField<60, 4, u64> attribute15_register; | ||
| 56 | 317 | ||
| 57 | union CommandHeader { | 318 | int GetRegisterForAttribute(int attribute_index) { |
| 58 | CommandHeader(u32 h) : hex(h) {} | 319 | u64 fields[] = { |
| 320 | attribute0_register, attribute1_register, attribute2_register, attribute3_register, | ||
| 321 | attribute4_register, attribute5_register, attribute6_register, attribute7_register, | ||
| 322 | attribute8_register, attribute9_register, attribute10_register, attribute11_register, | ||
| 323 | attribute12_register, attribute13_register, attribute14_register, attribute15_register, | ||
| 324 | }; | ||
| 325 | return (int)fields[attribute_index]; | ||
| 326 | } | ||
| 327 | } vs_input_register_map; | ||
| 59 | 328 | ||
| 60 | u32 hex; | 329 | INSERT_PADDING_WORDS(0x3); |
| 61 | 330 | ||
| 62 | BitField< 0, 16, Regs::Id> cmd_id; | 331 | struct { |
| 63 | BitField<16, 4, u32> parameter_mask; | 332 | enum Format : u32 |
| 64 | BitField<20, 11, u32> extra_data_length; | 333 | { |
| 65 | BitField<31, 1, u32> group_commands; | 334 | FLOAT24 = 0, |
| 66 | }; | 335 | FLOAT32 = 1 |
| 336 | }; | ||
| 67 | 337 | ||
| 68 | static std::map<Regs::Id, const char*> command_names = { | 338 | bool IsFloat32() const { |
| 69 | {Regs::ViewportSizeX, "ViewportSizeX" }, | 339 | return format == FLOAT32; |
| 70 | {Regs::ViewportInvSizeX, "ViewportInvSizeX" }, | 340 | } |
| 71 | {Regs::ViewportSizeY, "ViewportSizeY" }, | 341 | |
| 72 | {Regs::ViewportInvSizeY, "ViewportInvSizeY" }, | 342 | union { |
| 73 | {Regs::ViewportCorner, "ViewportCorner" }, | 343 | // Index of the next uniform to write to |
| 74 | {Regs::DepthBufferFormat, "DepthBufferFormat" }, | 344 | // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices |
| 75 | {Regs::ColorBufferFormat, "ColorBufferFormat" }, | 345 | BitField<0, 7, u32> index; |
| 76 | {Regs::DepthBufferAddress, "DepthBufferAddress" }, | 346 | |
| 77 | {Regs::ColorBufferAddress, "ColorBufferAddress" }, | 347 | BitField<31, 1, Format> format; |
| 78 | {Regs::ColorBufferSize, "ColorBufferSize" }, | 348 | }; |
| 79 | }; | 349 | |
| 350 | // Writing to these registers sets the "current" uniform. | ||
| 351 | // TODO: It's not clear how the hardware stores what the "current" uniform is. | ||
| 352 | u32 set_value[8]; | ||
| 353 | |||
| 354 | } vs_uniform_setup; | ||
| 355 | |||
| 356 | INSERT_PADDING_WORDS(0x2); | ||
| 357 | |||
| 358 | struct { | ||
| 359 | u32 begin_load; | ||
| 360 | |||
| 361 | // Writing to these registers sets the "current" word in the shader program. | ||
| 362 | // TODO: It's not clear how the hardware stores what the "current" word is. | ||
| 363 | u32 set_word[8]; | ||
| 364 | } vs_program; | ||
| 365 | |||
| 366 | INSERT_PADDING_WORDS(0x1); | ||
| 80 | 367 | ||
| 81 | template<> | 368 | // This register group is used to load an internal table of swizzling patterns, |
| 82 | union Regs::Struct<Regs::ViewportSizeX> { | 369 | // which are indexed by each shader instruction to specify vector component swizzling. |
| 83 | BitField<0, 24, u32> value; | 370 | struct { |
| 371 | u32 begin_load; | ||
| 372 | |||
| 373 | // Writing to these registers sets the "current" swizzle pattern in the table. | ||
| 374 | // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is. | ||
| 375 | u32 set_word[8]; | ||
| 376 | } vs_swizzle_patterns; | ||
| 377 | |||
| 378 | INSERT_PADDING_WORDS(0x22); | ||
| 379 | |||
| 380 | #undef INSERT_PADDING_WORDS_HELPER1 | ||
| 381 | #undef INSERT_PADDING_WORDS_HELPER2 | ||
| 382 | #undef INSERT_PADDING_WORDS | ||
| 383 | |||
| 384 | // Map register indices to names readable by humans | ||
| 385 | // Used for debugging purposes, so performance is not an issue here | ||
| 386 | static std::string GetCommandName(int index) { | ||
| 387 | std::map<u32, std::string> map; | ||
| 388 | Regs regs; | ||
| 389 | |||
| 390 | // TODO: MSVC does not support using offsetof() on non-static data members even though this | ||
| 391 | // is technically allowed since C++11. Hence, this functionality is disabled until | ||
| 392 | // MSVC properly supports it. | ||
| 393 | #ifndef _MSC_VER | ||
| 394 | #define ADD_FIELD(name) \ | ||
| 395 | do { \ | ||
| 396 | map.insert({PICA_REG_INDEX(name), #name}); \ | ||
| 397 | for (u32 i = PICA_REG_INDEX(name) + 1; i < PICA_REG_INDEX(name) + sizeof(regs.name) / 4; ++i) \ | ||
| 398 | map.insert({i, #name + std::string("+") + std::to_string(i-PICA_REG_INDEX(name))}); \ | ||
| 399 | } while(false) | ||
| 400 | |||
| 401 | ADD_FIELD(viewport_size_x); | ||
| 402 | ADD_FIELD(viewport_size_y); | ||
| 403 | ADD_FIELD(viewport_depth_range); | ||
| 404 | ADD_FIELD(viewport_depth_far_plane); | ||
| 405 | ADD_FIELD(viewport_corner); | ||
| 406 | ADD_FIELD(framebuffer); | ||
| 407 | ADD_FIELD(vertex_attributes); | ||
| 408 | ADD_FIELD(index_array); | ||
| 409 | ADD_FIELD(num_vertices); | ||
| 410 | ADD_FIELD(trigger_draw); | ||
| 411 | ADD_FIELD(trigger_draw_indexed); | ||
| 412 | ADD_FIELD(triangle_topology); | ||
| 413 | ADD_FIELD(vs_main_offset); | ||
| 414 | ADD_FIELD(vs_input_register_map); | ||
| 415 | ADD_FIELD(vs_uniform_setup); | ||
| 416 | ADD_FIELD(vs_program); | ||
| 417 | ADD_FIELD(vs_swizzle_patterns); | ||
| 418 | |||
| 419 | #undef ADD_FIELD | ||
| 420 | #endif // _MSC_VER | ||
| 421 | |||
| 422 | // Return empty string if no match is found | ||
| 423 | return map[index]; | ||
| 424 | } | ||
| 425 | |||
| 426 | static inline int NumIds() { | ||
| 427 | return sizeof(Regs) / sizeof(u32); | ||
| 428 | } | ||
| 429 | |||
| 430 | u32& operator [] (int index) const { | ||
| 431 | u32* content = (u32*)this; | ||
| 432 | return content[index]; | ||
| 433 | } | ||
| 434 | |||
| 435 | u32& operator [] (int index) { | ||
| 436 | u32* content = (u32*)this; | ||
| 437 | return content[index]; | ||
| 438 | } | ||
| 439 | |||
| 440 | private: | ||
| 441 | /* | ||
| 442 | * Most physical addresses which Pica registers refer to are 8-byte aligned. | ||
| 443 | * This function should be used to get the address from a raw register value. | ||
| 444 | */ | ||
| 445 | static inline u32 DecodeAddressRegister(u32 register_value) { | ||
| 446 | return register_value * 8; | ||
| 447 | } | ||
| 84 | }; | 448 | }; |
| 85 | 449 | ||
| 86 | template<> | 450 | // TODO: MSVC does not support using offsetof() on non-static data members even though this |
| 87 | union Regs::Struct<Regs::ViewportSizeY> { | 451 | // is technically allowed since C++11. This macro should be enabled once MSVC adds |
| 88 | BitField<0, 24, u32> value; | 452 | // support for that. |
| 453 | #ifndef _MSC_VER | ||
| 454 | #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") | ||
| 455 | |||
| 456 | ASSERT_REG_POSITION(viewport_size_x, 0x41); | ||
| 457 | ASSERT_REG_POSITION(viewport_size_y, 0x43); | ||
| 458 | ASSERT_REG_POSITION(viewport_depth_range, 0x4d); | ||
| 459 | ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); | ||
| 460 | ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); | ||
| 461 | ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); | ||
| 462 | ASSERT_REG_POSITION(viewport_corner, 0x68); | ||
| 463 | ASSERT_REG_POSITION(framebuffer, 0x110); | ||
| 464 | ASSERT_REG_POSITION(vertex_attributes, 0x200); | ||
| 465 | ASSERT_REG_POSITION(index_array, 0x227); | ||
| 466 | ASSERT_REG_POSITION(num_vertices, 0x228); | ||
| 467 | ASSERT_REG_POSITION(trigger_draw, 0x22e); | ||
| 468 | ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | ||
| 469 | ASSERT_REG_POSITION(triangle_topology, 0x25e); | ||
| 470 | ASSERT_REG_POSITION(vs_main_offset, 0x2ba); | ||
| 471 | ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); | ||
| 472 | ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); | ||
| 473 | ASSERT_REG_POSITION(vs_program, 0x2cb); | ||
| 474 | ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); | ||
| 475 | |||
| 476 | #undef ASSERT_REG_POSITION | ||
| 477 | #endif // !defined(_MSC_VER) | ||
| 478 | |||
| 479 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. | ||
| 480 | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | ||
| 481 | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | ||
| 482 | |||
| 483 | extern Regs registers; // TODO: Not sure if we want to have one global instance for this | ||
| 484 | |||
| 485 | |||
| 486 | struct float24 { | ||
| 487 | static float24 FromFloat32(float val) { | ||
| 488 | float24 ret; | ||
| 489 | ret.value = val; | ||
| 490 | return ret; | ||
| 491 | } | ||
| 492 | |||
| 493 | // 16 bit mantissa, 7 bit exponent, 1 bit sign | ||
| 494 | // TODO: No idea if this works as intended | ||
| 495 | static float24 FromRawFloat24(u32 hex) { | ||
| 496 | float24 ret; | ||
| 497 | if ((hex & 0xFFFFFF) == 0) { | ||
| 498 | ret.value = 0; | ||
| 499 | } else { | ||
| 500 | u32 mantissa = hex & 0xFFFF; | ||
| 501 | u32 exponent = (hex >> 16) & 0x7F; | ||
| 502 | u32 sign = hex >> 23; | ||
| 503 | ret.value = powf(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * powf(2.0f, -16.f)); | ||
| 504 | if (sign) | ||
| 505 | ret.value = -ret.value; | ||
| 506 | } | ||
| 507 | return ret; | ||
| 508 | } | ||
| 509 | |||
| 510 | // Not recommended for anything but logging | ||
| 511 | float ToFloat32() const { | ||
| 512 | return value; | ||
| 513 | } | ||
| 514 | |||
| 515 | float24 operator * (const float24& flt) const { | ||
| 516 | return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); | ||
| 517 | } | ||
| 518 | |||
| 519 | float24 operator / (const float24& flt) const { | ||
| 520 | return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); | ||
| 521 | } | ||
| 522 | |||
| 523 | float24 operator + (const float24& flt) const { | ||
| 524 | return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); | ||
| 525 | } | ||
| 526 | |||
| 527 | float24 operator - (const float24& flt) const { | ||
| 528 | return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); | ||
| 529 | } | ||
| 530 | |||
| 531 | float24 operator - () const { | ||
| 532 | return float24::FromFloat32(-ToFloat32()); | ||
| 533 | } | ||
| 534 | |||
| 535 | bool operator < (const float24& flt) const { | ||
| 536 | return ToFloat32() < flt.ToFloat32(); | ||
| 537 | } | ||
| 538 | |||
| 539 | bool operator > (const float24& flt) const { | ||
| 540 | return ToFloat32() > flt.ToFloat32(); | ||
| 541 | } | ||
| 542 | |||
| 543 | bool operator >= (const float24& flt) const { | ||
| 544 | return ToFloat32() >= flt.ToFloat32(); | ||
| 545 | } | ||
| 546 | |||
| 547 | bool operator <= (const float24& flt) const { | ||
| 548 | return ToFloat32() <= flt.ToFloat32(); | ||
| 549 | } | ||
| 550 | |||
| 551 | private: | ||
| 552 | float24() = default; | ||
| 553 | |||
| 554 | // Stored as a regular float, merely for convenience | ||
| 555 | // TODO: Perform proper arithmetic on this! | ||
| 556 | float value; | ||
| 89 | }; | 557 | }; |
| 90 | 558 | ||
| 91 | template<> | 559 | union CommandHeader { |
| 92 | union Regs::Struct<Regs::VertexDescriptor> { | 560 | CommandHeader(u32 h) : hex(h) {} |
| 93 | enum class Format : u64 { | 561 | |
| 94 | BYTE = 0, | 562 | u32 hex; |
| 95 | UBYTE = 1, | ||
| 96 | SHORT = 2, | ||
| 97 | FLOAT = 3, | ||
| 98 | }; | ||
| 99 | 563 | ||
| 100 | BitField< 0, 2, Format> format0; | 564 | BitField< 0, 16, u32> cmd_id; |
| 101 | BitField< 2, 2, u64> size0; // number of elements minus 1 | 565 | BitField<16, 4, u32> parameter_mask; |
| 102 | BitField< 4, 2, Format> format1; | 566 | BitField<20, 11, u32> extra_data_length; |
| 103 | BitField< 6, 2, u64> size1; | 567 | BitField<31, 1, u32> group_commands; |
| 104 | BitField< 8, 2, Format> format2; | ||
| 105 | BitField<10, 2, u64> size2; | ||
| 106 | BitField<12, 2, Format> format3; | ||
| 107 | BitField<14, 2, u64> size3; | ||
| 108 | BitField<16, 2, Format> format4; | ||
| 109 | BitField<18, 2, u64> size4; | ||
| 110 | BitField<20, 2, Format> format5; | ||
| 111 | BitField<22, 2, u64> size5; | ||
| 112 | BitField<24, 2, Format> format6; | ||
| 113 | BitField<26, 2, u64> size6; | ||
| 114 | BitField<28, 2, Format> format7; | ||
| 115 | BitField<30, 2, u64> size7; | ||
| 116 | BitField<32, 2, Format> format8; | ||
| 117 | BitField<34, 2, u64> size8; | ||
| 118 | BitField<36, 2, Format> format9; | ||
| 119 | BitField<38, 2, u64> size9; | ||
| 120 | BitField<40, 2, Format> format10; | ||
| 121 | BitField<42, 2, u64> size10; | ||
| 122 | BitField<44, 2, Format> format11; | ||
| 123 | BitField<46, 2, u64> size11; | ||
| 124 | |||
| 125 | BitField<48, 12, u64> attribute_mask; | ||
| 126 | BitField<60, 4, u64> num_attributes; // number of total attributes minus 1 | ||
| 127 | }; | 568 | }; |
| 128 | 569 | ||
| 129 | 570 | ||
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp new file mode 100644 index 000000000..2354ffb99 --- /dev/null +++ b/src/video_core/primitive_assembly.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "clipper.h" | ||
| 6 | #include "pica.h" | ||
| 7 | #include "primitive_assembly.h" | ||
| 8 | #include "vertex_shader.h" | ||
| 9 | |||
| 10 | namespace Pica { | ||
| 11 | |||
| 12 | namespace PrimitiveAssembly { | ||
| 13 | |||
| 14 | static OutputVertex buffer[2]; | ||
| 15 | static int buffer_index = 0; // TODO: reset this on emulation restart | ||
| 16 | |||
| 17 | void SubmitVertex(OutputVertex& vtx) | ||
| 18 | { | ||
| 19 | switch (registers.triangle_topology) { | ||
| 20 | case Regs::TriangleTopology::List: | ||
| 21 | case Regs::TriangleTopology::ListIndexed: | ||
| 22 | if (buffer_index < 2) { | ||
| 23 | buffer[buffer_index++] = vtx; | ||
| 24 | } else { | ||
| 25 | buffer_index = 0; | ||
| 26 | |||
| 27 | Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); | ||
| 28 | } | ||
| 29 | break; | ||
| 30 | |||
| 31 | case Regs::TriangleTopology::Fan: | ||
| 32 | if (buffer_index == 2) { | ||
| 33 | buffer_index = 0; | ||
| 34 | |||
| 35 | Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); | ||
| 36 | |||
| 37 | buffer[1] = vtx; | ||
| 38 | } else { | ||
| 39 | buffer[buffer_index++] = vtx; | ||
| 40 | } | ||
| 41 | break; | ||
| 42 | |||
| 43 | default: | ||
| 44 | ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value()); | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace | ||
| 50 | |||
| 51 | } // namespace | ||
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h new file mode 100644 index 000000000..2a2b0c170 --- /dev/null +++ b/src/video_core/primitive_assembly.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Pica { | ||
| 8 | |||
| 9 | namespace VertexShader { | ||
| 10 | struct OutputVertex; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace PrimitiveAssembly { | ||
| 14 | |||
| 15 | using VertexShader::OutputVertex; | ||
| 16 | |||
| 17 | void SubmitVertex(OutputVertex& vtx); | ||
| 18 | |||
| 19 | } // namespace | ||
| 20 | |||
| 21 | } // namespace | ||
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp new file mode 100644 index 000000000..a7c1bab3e --- /dev/null +++ b/src/video_core/rasterizer.cpp | |||
| @@ -0,0 +1,180 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | #include "math.h" | ||
| 10 | #include "pica.h" | ||
| 11 | #include "rasterizer.h" | ||
| 12 | #include "vertex_shader.h" | ||
| 13 | |||
| 14 | namespace Pica { | ||
| 15 | |||
| 16 | namespace Rasterizer { | ||
| 17 | |||
| 18 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | ||
| 19 | u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); | ||
| 20 | u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | ||
| 21 | |||
| 22 | // Assuming RGBA8 format until actual framebuffer format handling is implemented | ||
| 23 | *(color_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value; | ||
| 24 | } | ||
| 25 | |||
| 26 | static u32 GetDepth(int x, int y) { | ||
| 27 | u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); | ||
| 28 | |||
| 29 | // Assuming 16-bit depth buffer format until actual format handling is implemented | ||
| 30 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2); | ||
| 31 | } | ||
| 32 | |||
| 33 | static void SetDepth(int x, int y, u16 value) { | ||
| 34 | u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); | ||
| 35 | |||
| 36 | // Assuming 16-bit depth buffer format until actual format handling is implemented | ||
| 37 | *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value; | ||
| 38 | } | ||
| 39 | |||
| 40 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||
| 41 | const VertexShader::OutputVertex& v1, | ||
| 42 | const VertexShader::OutputVertex& v2) | ||
| 43 | { | ||
| 44 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | ||
| 45 | struct Fix12P4 { | ||
| 46 | Fix12P4() {} | ||
| 47 | Fix12P4(u16 val) : val(val) {} | ||
| 48 | |||
| 49 | static u16 FracMask() { return 0xF; } | ||
| 50 | static u16 IntMask() { return (u16)~0xF; } | ||
| 51 | |||
| 52 | operator u16() const { | ||
| 53 | return val; | ||
| 54 | } | ||
| 55 | |||
| 56 | bool operator < (const Fix12P4& oth) const { | ||
| 57 | return (u16)*this < (u16)oth; | ||
| 58 | } | ||
| 59 | |||
| 60 | private: | ||
| 61 | u16 val; | ||
| 62 | }; | ||
| 63 | |||
| 64 | // vertex positions in rasterizer coordinates | ||
| 65 | auto FloatToFix = [](float24 flt) { | ||
| 66 | return Fix12P4(flt.ToFloat32() * 16.0f); | ||
| 67 | }; | ||
| 68 | auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { | ||
| 69 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | ||
| 70 | }; | ||
| 71 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | ||
| 72 | ScreenToRasterizerCoordinates(v1.screenpos), | ||
| 73 | ScreenToRasterizerCoordinates(v2.screenpos) }; | ||
| 74 | |||
| 75 | // TODO: Proper scissor rect test! | ||
| 76 | u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | ||
| 77 | u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | ||
| 78 | u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | ||
| 79 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | ||
| 80 | |||
| 81 | min_x = min_x & Fix12P4::IntMask(); | ||
| 82 | min_y = min_y & Fix12P4::IntMask(); | ||
| 83 | max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); | ||
| 84 | max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); | ||
| 85 | |||
| 86 | // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not | ||
| 87 | // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias | ||
| 88 | // values which are added to the barycentric coordinates w0, w1 and w2, respectively. | ||
| 89 | // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... | ||
| 90 | auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, | ||
| 91 | const Math::Vec2<Fix12P4>& line1, | ||
| 92 | const Math::Vec2<Fix12P4>& line2) | ||
| 93 | { | ||
| 94 | if (line1.y == line2.y) { | ||
| 95 | // just check if vertex is above us => bottom line parallel to x-axis | ||
| 96 | return vtx.y < line1.y; | ||
| 97 | } else { | ||
| 98 | // check if vertex is on our left => right side | ||
| 99 | // TODO: Not sure how likely this is to overflow | ||
| 100 | return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); | ||
| 101 | } | ||
| 102 | }; | ||
| 103 | int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; | ||
| 104 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | ||
| 105 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | ||
| 106 | |||
| 107 | // TODO: Not sure if looping through x first might be faster | ||
| 108 | for (u16 y = min_y; y < max_y; y += 0x10) { | ||
| 109 | for (u16 x = min_x; x < max_x; x += 0x10) { | ||
| 110 | |||
| 111 | // Calculate the barycentric coordinates w0, w1 and w2 | ||
| 112 | auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, | ||
| 113 | const Math::Vec2<Fix12P4>& vtx2, | ||
| 114 | const Math::Vec2<Fix12P4>& vtx3) { | ||
| 115 | const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); | ||
| 116 | const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); | ||
| 117 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | ||
| 118 | return Cross(vec1, vec2).z; | ||
| 119 | }; | ||
| 120 | |||
| 121 | int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||
| 122 | int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||
| 123 | int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||
| 124 | int wsum = w0 + w1 + w2; | ||
| 125 | |||
| 126 | // If current pixel is not covered by the current primitive | ||
| 127 | if (w0 < 0 || w1 < 0 || w2 < 0) | ||
| 128 | continue; | ||
| 129 | |||
| 130 | // Perspective correct attribute interpolation: | ||
| 131 | // Attribute values cannot be calculated by simple linear interpolation since | ||
| 132 | // they are not linear in screen space. For example, when interpolating a | ||
| 133 | // texture coordinate across two vertices, something simple like | ||
| 134 | // u = (u0*w0 + u1*w1)/(w0+w1) | ||
| 135 | // will not work. However, the attribute value divided by the | ||
| 136 | // clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear | ||
| 137 | // in screenspace. Hence, we can linearly interpolate these two independently and | ||
| 138 | // calculate the interpolated attribute by dividing the results. | ||
| 139 | // I.e. | ||
| 140 | // u_over_w = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1) | ||
| 141 | // one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1) | ||
| 142 | // u = u_over_w / one_over_w | ||
| 143 | // | ||
| 144 | // The generalization to three vertices is straightforward in baricentric coordinates. | ||
| 145 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { | ||
| 146 | auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, | ||
| 147 | attr1 / v1.pos.w, | ||
| 148 | attr2 / v2.pos.w); | ||
| 149 | auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, | ||
| 150 | float24::FromFloat32(1.f) / v1.pos.w, | ||
| 151 | float24::FromFloat32(1.f) / v2.pos.w); | ||
| 152 | auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), | ||
| 153 | float24::FromFloat32(w1), | ||
| 154 | float24::FromFloat32(w2)); | ||
| 155 | |||
| 156 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); | ||
| 157 | float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); | ||
| 158 | return interpolated_attr_over_w / interpolated_w_inverse; | ||
| 159 | }; | ||
| 160 | |||
| 161 | Math::Vec4<u8> primary_color{ | ||
| 162 | (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), | ||
| 163 | (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), | ||
| 164 | (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), | ||
| 165 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) | ||
| 166 | }; | ||
| 167 | |||
| 168 | u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + | ||
| 169 | (float)v1.screenpos[2].ToFloat32() * w1 + | ||
| 170 | (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? | ||
| 171 | SetDepth(x >> 4, y >> 4, z); | ||
| 172 | |||
| 173 | DrawPixel(x >> 4, y >> 4, primary_color); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | } // namespace Rasterizer | ||
| 179 | |||
| 180 | } // namespace Pica | ||
diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h new file mode 100644 index 000000000..500be9462 --- /dev/null +++ b/src/video_core/rasterizer.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Pica { | ||
| 8 | |||
| 9 | namespace VertexShader { | ||
| 10 | struct OutputVertex; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Rasterizer { | ||
| 14 | |||
| 15 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||
| 16 | const VertexShader::OutputVertex& v1, | ||
| 17 | const VertexShader::OutputVertex& v2); | ||
| 18 | |||
| 19 | } // namespace Rasterizer | ||
| 20 | |||
| 21 | } // namespace Pica | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d0a8ec1da..f11a64fad 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -78,23 +78,23 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { | |||
| 78 | */ | 78 | */ |
| 79 | void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { | 79 | void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { |
| 80 | 80 | ||
| 81 | const auto& framebuffer_top = GPU::g_regs.Get<GPU::Regs::FramebufferTop>(); | 81 | const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0]; |
| 82 | const auto& framebuffer_sub = GPU::g_regs.Get<GPU::Regs::FramebufferBottom>(); | 82 | const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1]; |
| 83 | const u32 active_fb_top = (framebuffer_top.active_fb == 1) | 83 | const u32 active_fb_top = (framebuffer_top.active_fb == 1) |
| 84 | ? framebuffer_top.address_left2 | 84 | ? Memory::PhysicalToVirtualAddress(framebuffer_top.address_left2) |
| 85 | : framebuffer_top.address_left1; | 85 | : Memory::PhysicalToVirtualAddress(framebuffer_top.address_left1); |
| 86 | const u32 active_fb_sub = (framebuffer_sub.active_fb == 1) | 86 | const u32 active_fb_sub = (framebuffer_sub.active_fb == 1) |
| 87 | ? framebuffer_sub.address_left2 | 87 | ? Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left2) |
| 88 | : framebuffer_sub.address_left1; | 88 | : Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left1); |
| 89 | 89 | ||
| 90 | DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x", | 90 | DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x", |
| 91 | framebuffer_top.stride * framebuffer_top.height, | 91 | framebuffer_top.stride * framebuffer_top.height, |
| 92 | GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.width, | 92 | active_fb_top, (int)framebuffer_top.width, |
| 93 | (int)framebuffer_top.height, (int)framebuffer_top.format); | 93 | (int)framebuffer_top.height, (int)framebuffer_top.format); |
| 94 | 94 | ||
| 95 | // TODO: This should consider the GPU registers for framebuffer width, height and stride. | 95 | // TODO: This should consider the GPU registers for framebuffer width, height and stride. |
| 96 | FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); | 96 | FlipFramebuffer(Memory::GetPointer(active_fb_top), m_xfb_top_flipped); |
| 97 | FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_sub), m_xfb_bottom_flipped); | 97 | FlipFramebuffer(Memory::GetPointer(active_fb_sub), m_xfb_bottom_flipped); |
| 98 | 98 | ||
| 99 | // Blit the top framebuffer | 99 | // Blit the top framebuffer |
| 100 | // ------------------------ | 100 | // ------------------------ |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp new file mode 100644 index 000000000..93830a96a --- /dev/null +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -0,0 +1,270 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "pica.h" | ||
| 6 | #include "vertex_shader.h" | ||
| 7 | #include <core/mem_map.h> | ||
| 8 | #include <common/file_util.h> | ||
| 9 | |||
| 10 | namespace Pica { | ||
| 11 | |||
| 12 | namespace VertexShader { | ||
| 13 | |||
| 14 | static struct { | ||
| 15 | Math::Vec4<float24> f[96]; | ||
| 16 | } shader_uniforms; | ||
| 17 | |||
| 18 | |||
| 19 | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! | ||
| 20 | // For now, we just keep these local arrays around. | ||
| 21 | static u32 shader_memory[1024]; | ||
| 22 | static u32 swizzle_data[1024]; | ||
| 23 | |||
| 24 | void SubmitShaderMemoryChange(u32 addr, u32 value) | ||
| 25 | { | ||
| 26 | shader_memory[addr] = value; | ||
| 27 | } | ||
| 28 | |||
| 29 | void SubmitSwizzleDataChange(u32 addr, u32 value) | ||
| 30 | { | ||
| 31 | swizzle_data[addr] = value; | ||
| 32 | } | ||
| 33 | |||
| 34 | Math::Vec4<float24>& GetFloatUniform(u32 index) | ||
| 35 | { | ||
| 36 | return shader_uniforms.f[index]; | ||
| 37 | } | ||
| 38 | |||
| 39 | struct VertexShaderState { | ||
| 40 | u32* program_counter; | ||
| 41 | |||
| 42 | const float24* input_register_table[16]; | ||
| 43 | float24* output_register_table[7*4]; | ||
| 44 | |||
| 45 | Math::Vec4<float24> temporary_registers[16]; | ||
| 46 | bool status_registers[2]; | ||
| 47 | |||
| 48 | enum { | ||
| 49 | INVALID_ADDRESS = 0xFFFFFFFF | ||
| 50 | }; | ||
| 51 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? | ||
| 52 | u32* call_stack_pointer; | ||
| 53 | }; | ||
| 54 | |||
| 55 | static void ProcessShaderCode(VertexShaderState& state) { | ||
| 56 | while (true) { | ||
| 57 | bool increment_pc = true; | ||
| 58 | bool exit_loop = false; | ||
| 59 | const Instruction& instr = *(const Instruction*)state.program_counter; | ||
| 60 | |||
| 61 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] | ||
| 62 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x | ||
| 63 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x | ||
| 64 | : nullptr; | ||
| 65 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] | ||
| 66 | : &state.temporary_registers[instr.common.src2-0x10].x; | ||
| 67 | // TODO: Unsure about the limit values | ||
| 68 | float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] | ||
| 69 | : (instr.common.dest <= 0x3C) ? nullptr | ||
| 70 | : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] | ||
| 71 | : nullptr; | ||
| 72 | |||
| 73 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | ||
| 74 | |||
| 75 | const float24 src1[4] = { | ||
| 76 | src1_[(int)swizzle.GetSelectorSrc1(0)], | ||
| 77 | src1_[(int)swizzle.GetSelectorSrc1(1)], | ||
| 78 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 79 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 80 | }; | ||
| 81 | const float24 src2[4] = { | ||
| 82 | src2_[(int)swizzle.GetSelectorSrc2(0)], | ||
| 83 | src2_[(int)swizzle.GetSelectorSrc2(1)], | ||
| 84 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 85 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 86 | }; | ||
| 87 | |||
| 88 | switch (instr.opcode) { | ||
| 89 | case Instruction::OpCode::ADD: | ||
| 90 | { | ||
| 91 | for (int i = 0; i < 4; ++i) { | ||
| 92 | if (!swizzle.DestComponentEnabled(i)) | ||
| 93 | continue; | ||
| 94 | |||
| 95 | dest[i] = src1[i] + src2[i]; | ||
| 96 | } | ||
| 97 | |||
| 98 | break; | ||
| 99 | } | ||
| 100 | |||
| 101 | case Instruction::OpCode::MUL: | ||
| 102 | { | ||
| 103 | for (int i = 0; i < 4; ++i) { | ||
| 104 | if (!swizzle.DestComponentEnabled(i)) | ||
| 105 | continue; | ||
| 106 | |||
| 107 | dest[i] = src1[i] * src2[i]; | ||
| 108 | } | ||
| 109 | |||
| 110 | break; | ||
| 111 | } | ||
| 112 | |||
| 113 | case Instruction::OpCode::DP3: | ||
| 114 | case Instruction::OpCode::DP4: | ||
| 115 | { | ||
| 116 | float24 dot = float24::FromFloat32(0.f); | ||
| 117 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | ||
| 118 | for (int i = 0; i < num_components; ++i) | ||
| 119 | dot = dot + src1[i] * src2[i]; | ||
| 120 | |||
| 121 | for (int i = 0; i < num_components; ++i) { | ||
| 122 | if (!swizzle.DestComponentEnabled(i)) | ||
| 123 | continue; | ||
| 124 | |||
| 125 | dest[i] = dot; | ||
| 126 | } | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | |||
| 130 | // Reciprocal | ||
| 131 | case Instruction::OpCode::RCP: | ||
| 132 | { | ||
| 133 | for (int i = 0; i < 4; ++i) { | ||
| 134 | if (!swizzle.DestComponentEnabled(i)) | ||
| 135 | continue; | ||
| 136 | |||
| 137 | // TODO: Be stable against division by zero! | ||
| 138 | // TODO: I think this might be wrong... we should only use one component here | ||
| 139 | dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32()); | ||
| 140 | } | ||
| 141 | |||
| 142 | break; | ||
| 143 | } | ||
| 144 | |||
| 145 | // Reciprocal Square Root | ||
| 146 | case Instruction::OpCode::RSQ: | ||
| 147 | { | ||
| 148 | for (int i = 0; i < 4; ++i) { | ||
| 149 | if (!swizzle.DestComponentEnabled(i)) | ||
| 150 | continue; | ||
| 151 | |||
| 152 | // TODO: Be stable against division by zero! | ||
| 153 | // TODO: I think this might be wrong... we should only use one component here | ||
| 154 | dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32())); | ||
| 155 | } | ||
| 156 | |||
| 157 | break; | ||
| 158 | } | ||
| 159 | |||
| 160 | case Instruction::OpCode::MOV: | ||
| 161 | { | ||
| 162 | for (int i = 0; i < 4; ++i) { | ||
| 163 | if (!swizzle.DestComponentEnabled(i)) | ||
| 164 | continue; | ||
| 165 | |||
| 166 | dest[i] = src1[i]; | ||
| 167 | } | ||
| 168 | break; | ||
| 169 | } | ||
| 170 | |||
| 171 | case Instruction::OpCode::RET: | ||
| 172 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | ||
| 173 | exit_loop = true; | ||
| 174 | } else { | ||
| 175 | state.program_counter = &shader_memory[*state.call_stack_pointer--]; | ||
| 176 | *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; | ||
| 177 | } | ||
| 178 | |||
| 179 | break; | ||
| 180 | |||
| 181 | case Instruction::OpCode::CALL: | ||
| 182 | increment_pc = false; | ||
| 183 | |||
| 184 | _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); | ||
| 185 | |||
| 186 | *++state.call_stack_pointer = state.program_counter - shader_memory; | ||
| 187 | // TODO: Does this offset refer to the beginning of shader memory? | ||
| 188 | state.program_counter = &shader_memory[instr.flow_control.offset_words]; | ||
| 189 | break; | ||
| 190 | |||
| 191 | case Instruction::OpCode::FLS: | ||
| 192 | // TODO: Do whatever needs to be done here? | ||
| 193 | break; | ||
| 194 | |||
| 195 | default: | ||
| 196 | ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||
| 197 | (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); | ||
| 198 | break; | ||
| 199 | } | ||
| 200 | |||
| 201 | if (increment_pc) | ||
| 202 | ++state.program_counter; | ||
| 203 | |||
| 204 | if (exit_loop) | ||
| 205 | break; | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | OutputVertex RunShader(const InputVertex& input, int num_attributes) | ||
| 210 | { | ||
| 211 | VertexShaderState state; | ||
| 212 | |||
| 213 | const u32* main = &shader_memory[registers.vs_main_offset]; | ||
| 214 | state.program_counter = (u32*)main; | ||
| 215 | |||
| 216 | // Setup input register table | ||
| 217 | const auto& attribute_register_map = registers.vs_input_register_map; | ||
| 218 | float24 dummy_register; | ||
| 219 | std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register); | ||
| 220 | if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||
| 221 | if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||
| 222 | if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||
| 223 | if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||
| 224 | if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||
| 225 | if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||
| 226 | if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||
| 227 | if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||
| 228 | if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||
| 229 | if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||
| 230 | if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||
| 231 | if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||
| 232 | if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||
| 233 | if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||
| 234 | if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||
| 235 | if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||
| 236 | |||
| 237 | // Setup output register table | ||
| 238 | OutputVertex ret; | ||
| 239 | for (int i = 0; i < 7; ++i) { | ||
| 240 | const auto& output_register_map = registers.vs_output_attributes[i]; | ||
| 241 | |||
| 242 | u32 semantics[4] = { | ||
| 243 | output_register_map.map_x, output_register_map.map_y, | ||
| 244 | output_register_map.map_z, output_register_map.map_w | ||
| 245 | }; | ||
| 246 | |||
| 247 | for (int comp = 0; comp < 4; ++comp) | ||
| 248 | state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; | ||
| 249 | } | ||
| 250 | |||
| 251 | state.status_registers[0] = false; | ||
| 252 | state.status_registers[1] = false; | ||
| 253 | std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]), | ||
| 254 | VertexShaderState::INVALID_ADDRESS); | ||
| 255 | state.call_stack_pointer = &state.call_stack[0]; | ||
| 256 | |||
| 257 | ProcessShaderCode(state); | ||
| 258 | |||
| 259 | DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||
| 260 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||
| 261 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||
| 262 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | ||
| 263 | |||
| 264 | return ret; | ||
| 265 | } | ||
| 266 | |||
| 267 | |||
| 268 | } // namespace | ||
| 269 | |||
| 270 | } // namespace | ||
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h new file mode 100644 index 000000000..1b71e367b --- /dev/null +++ b/src/video_core/vertex_shader.h | |||
| @@ -0,0 +1,211 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <initializer_list> | ||
| 8 | |||
| 9 | #include <common/common_types.h> | ||
| 10 | |||
| 11 | #include "math.h" | ||
| 12 | #include "pica.h" | ||
| 13 | |||
| 14 | namespace Pica { | ||
| 15 | |||
| 16 | namespace VertexShader { | ||
| 17 | |||
| 18 | struct InputVertex { | ||
| 19 | Math::Vec4<float24> attr[16]; | ||
| 20 | }; | ||
| 21 | |||
| 22 | struct OutputVertex { | ||
| 23 | OutputVertex() = default; | ||
| 24 | |||
| 25 | // VS output attributes | ||
| 26 | Math::Vec4<float24> pos; | ||
| 27 | Math::Vec4<float24> dummy; // quaternions (not implemented, yet) | ||
| 28 | Math::Vec4<float24> color; | ||
| 29 | Math::Vec2<float24> tc0; | ||
| 30 | float24 tc0_v; | ||
| 31 | |||
| 32 | // Padding for optimal alignment | ||
| 33 | float24 pad[14]; | ||
| 34 | |||
| 35 | // Attributes used to store intermediate results | ||
| 36 | |||
| 37 | // position after perspective divide | ||
| 38 | Math::Vec3<float24> screenpos; | ||
| 39 | |||
| 40 | // Linear interpolation | ||
| 41 | // factor: 0=this, 1=vtx | ||
| 42 | void Lerp(float24 factor, const OutputVertex& vtx) { | ||
| 43 | pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | ||
| 44 | |||
| 45 | // TODO: Should perform perspective correct interpolation here... | ||
| 46 | tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | ||
| 47 | |||
| 48 | screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||
| 49 | |||
| 50 | color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | ||
| 51 | } | ||
| 52 | |||
| 53 | // Linear interpolation | ||
| 54 | // factor: 0=v0, 1=v1 | ||
| 55 | static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { | ||
| 56 | OutputVertex ret = v0; | ||
| 57 | ret.Lerp(factor, v1); | ||
| 58 | return ret; | ||
| 59 | } | ||
| 60 | }; | ||
| 61 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||
| 62 | |||
| 63 | union Instruction { | ||
| 64 | enum class OpCode : u32 { | ||
| 65 | ADD = 0x0, | ||
| 66 | DP3 = 0x1, | ||
| 67 | DP4 = 0x2, | ||
| 68 | |||
| 69 | MUL = 0x8, | ||
| 70 | |||
| 71 | MAX = 0xC, | ||
| 72 | MIN = 0xD, | ||
| 73 | RCP = 0xE, | ||
| 74 | RSQ = 0xF, | ||
| 75 | |||
| 76 | MOV = 0x13, | ||
| 77 | |||
| 78 | RET = 0x21, | ||
| 79 | FLS = 0x22, // Flush | ||
| 80 | CALL = 0x24, | ||
| 81 | }; | ||
| 82 | |||
| 83 | std::string GetOpCodeName() const { | ||
| 84 | std::map<OpCode, std::string> map = { | ||
| 85 | { OpCode::ADD, "ADD" }, | ||
| 86 | { OpCode::DP3, "DP3" }, | ||
| 87 | { OpCode::DP4, "DP4" }, | ||
| 88 | { OpCode::MUL, "MUL" }, | ||
| 89 | { OpCode::MAX, "MAX" }, | ||
| 90 | { OpCode::MIN, "MIN" }, | ||
| 91 | { OpCode::RCP, "RCP" }, | ||
| 92 | { OpCode::RSQ, "RSQ" }, | ||
| 93 | { OpCode::MOV, "MOV" }, | ||
| 94 | { OpCode::RET, "RET" }, | ||
| 95 | { OpCode::FLS, "FLS" }, | ||
| 96 | }; | ||
| 97 | auto it = map.find(opcode); | ||
| 98 | if (it == map.end()) | ||
| 99 | return "UNK"; | ||
| 100 | else | ||
| 101 | return it->second; | ||
| 102 | } | ||
| 103 | |||
| 104 | u32 hex; | ||
| 105 | |||
| 106 | BitField<0x1a, 0x6, OpCode> opcode; | ||
| 107 | |||
| 108 | // General notes: | ||
| 109 | // | ||
| 110 | // When two input registers are used, one of them uses a 5-bit index while the other | ||
| 111 | // one uses a 7-bit index. This is because at most one floating point uniform may be used | ||
| 112 | // as an input. | ||
| 113 | |||
| 114 | |||
| 115 | // Format used e.g. by arithmetic instructions and comparisons | ||
| 116 | // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats), | ||
| 117 | // while "dest" addresses individual floats. | ||
| 118 | union { | ||
| 119 | BitField<0x00, 0x5, u32> operand_desc_id; | ||
| 120 | BitField<0x07, 0x5, u32> src2; | ||
| 121 | BitField<0x0c, 0x7, u32> src1; | ||
| 122 | BitField<0x13, 0x7, u32> dest; | ||
| 123 | } common; | ||
| 124 | |||
| 125 | // Format used for flow control instructions ("if") | ||
| 126 | union { | ||
| 127 | BitField<0x00, 0x8, u32> num_instructions; | ||
| 128 | BitField<0x0a, 0xc, u32> offset_words; | ||
| 129 | } flow_control; | ||
| 130 | }; | ||
| 131 | |||
| 132 | union SwizzlePattern { | ||
| 133 | u32 hex; | ||
| 134 | |||
| 135 | enum class Selector : u32 { | ||
| 136 | x = 0, | ||
| 137 | y = 1, | ||
| 138 | z = 2, | ||
| 139 | w = 3 | ||
| 140 | }; | ||
| 141 | |||
| 142 | Selector GetSelectorSrc1(int comp) const { | ||
| 143 | Selector selectors[] = { | ||
| 144 | src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3 | ||
| 145 | }; | ||
| 146 | return selectors[comp]; | ||
| 147 | } | ||
| 148 | |||
| 149 | Selector GetSelectorSrc2(int comp) const { | ||
| 150 | Selector selectors[] = { | ||
| 151 | src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3 | ||
| 152 | }; | ||
| 153 | return selectors[comp]; | ||
| 154 | } | ||
| 155 | |||
| 156 | bool DestComponentEnabled(int i) const { | ||
| 157 | return (dest_mask & (0x8 >> i)); | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string SelectorToString(bool src2) const { | ||
| 161 | std::map<Selector, std::string> map = { | ||
| 162 | { Selector::x, "x" }, | ||
| 163 | { Selector::y, "y" }, | ||
| 164 | { Selector::z, "z" }, | ||
| 165 | { Selector::w, "w" } | ||
| 166 | }; | ||
| 167 | std::string ret; | ||
| 168 | for (int i = 0; i < 4; ++i) { | ||
| 169 | ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i)); | ||
| 170 | } | ||
| 171 | return ret; | ||
| 172 | } | ||
| 173 | |||
| 174 | std::string DestMaskToString() const { | ||
| 175 | std::string ret; | ||
| 176 | for (int i = 0; i < 4; ++i) { | ||
| 177 | if (!DestComponentEnabled(i)) | ||
| 178 | ret += "_"; | ||
| 179 | else | ||
| 180 | ret += "xyzw"[i]; | ||
| 181 | } | ||
| 182 | return ret; | ||
| 183 | } | ||
| 184 | |||
| 185 | // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x | ||
| 186 | BitField< 0, 4, u32> dest_mask; | ||
| 187 | |||
| 188 | BitField< 5, 2, Selector> src1_selector_3; | ||
| 189 | BitField< 7, 2, Selector> src1_selector_2; | ||
| 190 | BitField< 9, 2, Selector> src1_selector_1; | ||
| 191 | BitField<11, 2, Selector> src1_selector_0; | ||
| 192 | |||
| 193 | BitField<14, 2, Selector> src2_selector_3; | ||
| 194 | BitField<16, 2, Selector> src2_selector_2; | ||
| 195 | BitField<18, 2, Selector> src2_selector_1; | ||
| 196 | BitField<20, 2, Selector> src2_selector_0; | ||
| 197 | |||
| 198 | BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign? | ||
| 199 | }; | ||
| 200 | |||
| 201 | void SubmitShaderMemoryChange(u32 addr, u32 value); | ||
| 202 | void SubmitSwizzleDataChange(u32 addr, u32 value); | ||
| 203 | |||
| 204 | OutputVertex RunShader(const InputVertex& input, int num_attributes); | ||
| 205 | |||
| 206 | Math::Vec4<float24>& GetFloatUniform(u32 index); | ||
| 207 | |||
| 208 | } // namespace | ||
| 209 | |||
| 210 | } // namespace | ||
| 211 | |||
diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj index d77be2bef..48d77cdc4 100644 --- a/src/video_core/video_core.vcxproj +++ b/src/video_core/video_core.vcxproj | |||
| @@ -20,14 +20,25 @@ | |||
| 20 | </ItemGroup> | 20 | </ItemGroup> |
| 21 | <ItemGroup> | 21 | <ItemGroup> |
| 22 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> | 22 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> |
| 23 | <ClCompile Include="clipper.cpp" /> | ||
| 24 | <ClCompile Include="command_processor.cpp" /> | ||
| 25 | <ClCompile Include="primitive_assembly.cpp" /> | ||
| 26 | <ClCompile Include="rasterizer.cpp" /> | ||
| 23 | <ClCompile Include="utils.cpp" /> | 27 | <ClCompile Include="utils.cpp" /> |
| 28 | <ClCompile Include="vertex_shader.cpp" /> | ||
| 24 | <ClCompile Include="video_core.cpp" /> | 29 | <ClCompile Include="video_core.cpp" /> |
| 25 | </ItemGroup> | 30 | </ItemGroup> |
| 26 | <ItemGroup> | 31 | <ItemGroup> |
| 32 | <ClInclude Include="clipper.h" /> | ||
| 33 | <ClInclude Include="command_processor.h" /> | ||
| 27 | <ClInclude Include="gpu_debugger.h" /> | 34 | <ClInclude Include="gpu_debugger.h" /> |
| 35 | <ClInclude Include="math.h" /> | ||
| 28 | <ClInclude Include="pica.h" /> | 36 | <ClInclude Include="pica.h" /> |
| 37 | <ClInclude Include="primitive_assembly.h" /> | ||
| 38 | <ClInclude Include="rasterizer.h" /> | ||
| 29 | <ClInclude Include="renderer_base.h" /> | 39 | <ClInclude Include="renderer_base.h" /> |
| 30 | <ClInclude Include="utils.h" /> | 40 | <ClInclude Include="utils.h" /> |
| 41 | <ClInclude Include="vertex_shader.h" /> | ||
| 31 | <ClInclude Include="video_core.h" /> | 42 | <ClInclude Include="video_core.h" /> |
| 32 | <ClInclude Include="renderer_opengl\renderer_opengl.h" /> | 43 | <ClInclude Include="renderer_opengl\renderer_opengl.h" /> |
| 33 | </ItemGroup> | 44 | </ItemGroup> |
diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters index b89ac1ac4..31af4f1df 100644 --- a/src/video_core/video_core.vcxproj.filters +++ b/src/video_core/video_core.vcxproj.filters | |||
| @@ -9,17 +9,28 @@ | |||
| 9 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> | 9 | <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> |
| 10 | <Filter>renderer_opengl</Filter> | 10 | <Filter>renderer_opengl</Filter> |
| 11 | </ClCompile> | 11 | </ClCompile> |
| 12 | <ClCompile Include="clipper.cpp" /> | ||
| 13 | <ClCompile Include="command_processor.cpp" /> | ||
| 14 | <ClCompile Include="primitive_assembly.cpp" /> | ||
| 15 | <ClCompile Include="rasterizer.cpp" /> | ||
| 12 | <ClCompile Include="utils.cpp" /> | 16 | <ClCompile Include="utils.cpp" /> |
| 17 | <ClCompile Include="vertex_shader.cpp" /> | ||
| 13 | <ClCompile Include="video_core.cpp" /> | 18 | <ClCompile Include="video_core.cpp" /> |
| 14 | </ItemGroup> | 19 | </ItemGroup> |
| 15 | <ItemGroup> | 20 | <ItemGroup> |
| 16 | <ClInclude Include="renderer_opengl\renderer_opengl.h"> | 21 | <ClInclude Include="renderer_opengl\renderer_opengl.h"> |
| 17 | <Filter>renderer_opengl</Filter> | 22 | <Filter>renderer_opengl</Filter> |
| 18 | </ClInclude> | 23 | </ClInclude> |
| 24 | <ClInclude Include="clipper.h" /> | ||
| 25 | <ClInclude Include="command_processor.h" /> | ||
| 19 | <ClInclude Include="gpu_debugger.h" /> | 26 | <ClInclude Include="gpu_debugger.h" /> |
| 27 | <ClInclude Include="math.h" /> | ||
| 20 | <ClInclude Include="pica.h" /> | 28 | <ClInclude Include="pica.h" /> |
| 29 | <ClInclude Include="primitive_assembly.h" /> | ||
| 30 | <ClInclude Include="rasterizer.h" /> | ||
| 21 | <ClInclude Include="renderer_base.h" /> | 31 | <ClInclude Include="renderer_base.h" /> |
| 22 | <ClInclude Include="utils.h" /> | 32 | <ClInclude Include="utils.h" /> |
| 33 | <ClInclude Include="vertex_shader.h" /> | ||
| 23 | <ClInclude Include="video_core.h" /> | 34 | <ClInclude Include="video_core.h" /> |
| 24 | </ItemGroup> | 35 | </ItemGroup> |
| 25 | <ItemGroup> | 36 | <ItemGroup> |