diff options
Diffstat (limited to 'src/video_core')
20 files changed, 740 insertions, 294 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d4cdb4ab2..2eaece298 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -24,41 +24,18 @@ namespace Tegra { | |||
| 24 | 24 | ||
| 25 | enum class BufferMethods { | 25 | enum class BufferMethods { |
| 26 | BindObject = 0, | 26 | BindObject = 0, |
| 27 | SetGraphMacroCode = 0x45, | 27 | CountBufferMethods = 0x40, |
| 28 | SetGraphMacroCodeArg = 0x46, | ||
| 29 | SetGraphMacroEntry = 0x47, | ||
| 30 | CountBufferMethods = 0x100, | ||
| 31 | }; | 28 | }; |
| 32 | 29 | ||
| 33 | void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) { | 30 | void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) { |
| 34 | LOG_WARNING(HW_GPU, "Processing method %08X on subchannel %u value %08X remaining params %u", | 31 | NGLOG_WARNING(HW_GPU, |
| 35 | method, subchannel, value, remaining_params); | 32 | "Processing method {:08X} on subchannel {} value " |
| 36 | 33 | "{:08X} remaining params {}", | |
| 37 | if (method == static_cast<u32>(BufferMethods::SetGraphMacroEntry)) { | 34 | method, subchannel, value, remaining_params); |
| 38 | // Prepare to upload a new macro, reset the upload counter. | ||
| 39 | LOG_DEBUG(HW_GPU, "Uploading GPU macro %08X", value); | ||
| 40 | current_macro_entry = value; | ||
| 41 | current_macro_code.clear(); | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | |||
| 45 | if (method == static_cast<u32>(BufferMethods::SetGraphMacroCodeArg)) { | ||
| 46 | // Append a new code word to the current macro. | ||
| 47 | current_macro_code.push_back(value); | ||
| 48 | |||
| 49 | // There are no more params remaining, submit the code to the 3D engine. | ||
| 50 | if (remaining_params == 0) { | ||
| 51 | maxwell_3d->SubmitMacroCode(current_macro_entry, std::move(current_macro_code)); | ||
| 52 | current_macro_entry = InvalidGraphMacroEntry; | ||
| 53 | current_macro_code.clear(); | ||
| 54 | } | ||
| 55 | |||
| 56 | return; | ||
| 57 | } | ||
| 58 | 35 | ||
| 59 | if (method == static_cast<u32>(BufferMethods::BindObject)) { | 36 | if (method == static_cast<u32>(BufferMethods::BindObject)) { |
| 60 | // Bind the current subchannel to the desired engine id. | 37 | // Bind the current subchannel to the desired engine id. |
| 61 | LOG_DEBUG(HW_GPU, "Binding subchannel %u to engine %u", subchannel, value); | 38 | NGLOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value); |
| 62 | ASSERT(bound_engines.find(subchannel) == bound_engines.end()); | 39 | ASSERT(bound_engines.find(subchannel) == bound_engines.end()); |
| 63 | bound_engines[subchannel] = static_cast<EngineID>(value); | 40 | bound_engines[subchannel] = static_cast<EngineID>(value); |
| 64 | return; | 41 | return; |
| @@ -66,7 +43,7 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) | |||
| 66 | 43 | ||
| 67 | if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) { | 44 | if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) { |
| 68 | // TODO(Subv): Research and implement these methods. | 45 | // TODO(Subv): Research and implement these methods. |
| 69 | LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); | 46 | NGLOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); |
| 70 | return; | 47 | return; |
| 71 | } | 48 | } |
| 72 | 49 | ||
| @@ -90,11 +67,9 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) | |||
| 90 | } | 67 | } |
| 91 | 68 | ||
| 92 | void GPU::ProcessCommandList(GPUVAddr address, u32 size) { | 69 | void GPU::ProcessCommandList(GPUVAddr address, u32 size) { |
| 93 | // TODO(Subv): PhysicalToVirtualAddress is a misnomer, it converts a GPU VAddr into an | 70 | const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); |
| 94 | // application VAddr. | 71 | VAddr current_addr = *head_address; |
| 95 | const VAddr head_address = memory_manager->PhysicalToVirtualAddress(address); | 72 | while (current_addr < *head_address + size * sizeof(CommandHeader)) { |
| 96 | VAddr current_addr = head_address; | ||
| 97 | while (current_addr < head_address + size * sizeof(CommandHeader)) { | ||
| 98 | const CommandHeader header = {Memory::Read32(current_addr)}; | 73 | const CommandHeader header = {Memory::Read32(current_addr)}; |
| 99 | current_addr += sizeof(u32); | 74 | current_addr += sizeof(u32); |
| 100 | 75 | ||
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 7aab163dc..9019f2504 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -2,12 +2,71 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/memory.h" | ||
| 5 | #include "video_core/engines/fermi_2d.h" | 6 | #include "video_core/engines/fermi_2d.h" |
| 7 | #include "video_core/textures/decoders.h" | ||
| 6 | 8 | ||
| 7 | namespace Tegra { | 9 | namespace Tegra { |
| 8 | namespace Engines { | 10 | namespace Engines { |
| 9 | 11 | ||
| 10 | void Fermi2D::WriteReg(u32 method, u32 value) {} | 12 | Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} |
| 13 | |||
| 14 | void Fermi2D::WriteReg(u32 method, u32 value) { | ||
| 15 | ASSERT_MSG(method < Regs::NUM_REGS, | ||
| 16 | "Invalid Fermi2D register, increase the size of the Regs structure"); | ||
| 17 | |||
| 18 | regs.reg_array[method] = value; | ||
| 19 | |||
| 20 | switch (method) { | ||
| 21 | case FERMI2D_REG_INDEX(trigger): { | ||
| 22 | HandleSurfaceCopy(); | ||
| 23 | break; | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | void Fermi2D::HandleSurfaceCopy() { | ||
| 29 | NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", | ||
| 30 | static_cast<u32>(regs.operation)); | ||
| 31 | |||
| 32 | const GPUVAddr source = regs.src.Address(); | ||
| 33 | const GPUVAddr dest = regs.dst.Address(); | ||
| 34 | |||
| 35 | // TODO(Subv): Only same-format and same-size copies are allowed for now. | ||
| 36 | ASSERT(regs.src.format == regs.dst.format); | ||
| 37 | ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); | ||
| 38 | |||
| 39 | // TODO(Subv): Only raw copies are implemented. | ||
| 40 | ASSERT(regs.operation == Regs::Operation::SrcCopy); | ||
| 41 | |||
| 42 | const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); | ||
| 43 | const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); | ||
| 44 | |||
| 45 | u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); | ||
| 46 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); | ||
| 47 | |||
| 48 | if (regs.src.linear == regs.dst.linear) { | ||
| 49 | // If the input layout and the output layout are the same, just perform a raw copy. | ||
| 50 | Memory::CopyBlock(dest_cpu, source_cpu, | ||
| 51 | src_bytes_per_pixel * regs.dst.width * regs.dst.height); | ||
| 52 | return; | ||
| 53 | } | ||
| 54 | |||
| 55 | u8* src_buffer = Memory::GetPointer(source_cpu); | ||
| 56 | u8* dst_buffer = Memory::GetPointer(dest_cpu); | ||
| 57 | |||
| 58 | if (!regs.src.linear && regs.dst.linear) { | ||
| 59 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | ||
| 60 | Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, | ||
| 61 | dst_bytes_per_pixel, src_buffer, dst_buffer, true, | ||
| 62 | regs.src.block_height); | ||
| 63 | } else { | ||
| 64 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | ||
| 65 | Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, | ||
| 66 | dst_bytes_per_pixel, dst_buffer, src_buffer, false, | ||
| 67 | regs.dst.block_height); | ||
| 68 | } | ||
| 69 | } | ||
| 11 | 70 | ||
| 12 | } // namespace Engines | 71 | } // namespace Engines |
| 13 | } // namespace Tegra | 72 | } // namespace Tegra |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 8967ddede..0c5b413cc 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -4,19 +4,106 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_funcs.h" | ||
| 7 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/gpu.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 8 | 14 | ||
| 9 | namespace Tegra { | 15 | namespace Tegra { |
| 10 | namespace Engines { | 16 | namespace Engines { |
| 11 | 17 | ||
| 18 | #define FERMI2D_REG_INDEX(field_name) \ | ||
| 19 | (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) | ||
| 20 | |||
| 12 | class Fermi2D final { | 21 | class Fermi2D final { |
| 13 | public: | 22 | public: |
| 14 | Fermi2D() = default; | 23 | explicit Fermi2D(MemoryManager& memory_manager); |
| 15 | ~Fermi2D() = default; | 24 | ~Fermi2D() = default; |
| 16 | 25 | ||
| 17 | /// Write the value to the register identified by method. | 26 | /// Write the value to the register identified by method. |
| 18 | void WriteReg(u32 method, u32 value); | 27 | void WriteReg(u32 method, u32 value); |
| 28 | |||
| 29 | struct Regs { | ||
| 30 | static constexpr size_t NUM_REGS = 0x258; | ||
| 31 | |||
| 32 | struct Surface { | ||
| 33 | RenderTargetFormat format; | ||
| 34 | BitField<0, 1, u32> linear; | ||
| 35 | union { | ||
| 36 | BitField<0, 4, u32> block_depth; | ||
| 37 | BitField<4, 4, u32> block_height; | ||
| 38 | BitField<8, 4, u32> block_width; | ||
| 39 | }; | ||
| 40 | u32 depth; | ||
| 41 | u32 layer; | ||
| 42 | u32 pitch; | ||
| 43 | u32 width; | ||
| 44 | u32 height; | ||
| 45 | u32 address_high; | ||
| 46 | u32 address_low; | ||
| 47 | |||
| 48 | GPUVAddr Address() const { | ||
| 49 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 50 | address_low); | ||
| 51 | } | ||
| 52 | }; | ||
| 53 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | ||
| 54 | |||
| 55 | enum class Operation : u32 { | ||
| 56 | SrcCopyAnd = 0, | ||
| 57 | ROPAnd = 1, | ||
| 58 | Blend = 2, | ||
| 59 | SrcCopy = 3, | ||
| 60 | ROP = 4, | ||
| 61 | SrcCopyPremult = 5, | ||
| 62 | BlendPremult = 6, | ||
| 63 | }; | ||
| 64 | |||
| 65 | union { | ||
| 66 | struct { | ||
| 67 | INSERT_PADDING_WORDS(0x80); | ||
| 68 | |||
| 69 | Surface dst; | ||
| 70 | |||
| 71 | INSERT_PADDING_WORDS(2); | ||
| 72 | |||
| 73 | Surface src; | ||
| 74 | |||
| 75 | INSERT_PADDING_WORDS(0x15); | ||
| 76 | |||
| 77 | Operation operation; | ||
| 78 | |||
| 79 | INSERT_PADDING_WORDS(0x9); | ||
| 80 | |||
| 81 | // TODO(Subv): This is only a guess. | ||
| 82 | u32 trigger; | ||
| 83 | |||
| 84 | INSERT_PADDING_WORDS(0x1A3); | ||
| 85 | }; | ||
| 86 | std::array<u32, NUM_REGS> reg_array; | ||
| 87 | }; | ||
| 88 | } regs{}; | ||
| 89 | |||
| 90 | MemoryManager& memory_manager; | ||
| 91 | |||
| 92 | private: | ||
| 93 | /// Performs the copy from the source surface to the destination surface as configured in the | ||
| 94 | /// registers. | ||
| 95 | void HandleSurfaceCopy(); | ||
| 19 | }; | 96 | }; |
| 20 | 97 | ||
| 98 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 99 | static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ | ||
| 100 | "Field " #field_name " has invalid position") | ||
| 101 | |||
| 102 | ASSERT_REG_POSITION(dst, 0x80); | ||
| 103 | ASSERT_REG_POSITION(src, 0x8C); | ||
| 104 | ASSERT_REG_POSITION(operation, 0xAB); | ||
| 105 | ASSERT_REG_POSITION(trigger, 0xB5); | ||
| 106 | #undef ASSERT_REG_POSITION | ||
| 107 | |||
| 21 | } // namespace Engines | 108 | } // namespace Engines |
| 22 | } // namespace Tegra | 109 | } // namespace Tegra |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2a3ff234a..4306b894f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,10 +22,6 @@ constexpr u32 MacroRegistersStart = 0xE00; | |||
| 22 | Maxwell3D::Maxwell3D(MemoryManager& memory_manager) | 22 | Maxwell3D::Maxwell3D(MemoryManager& memory_manager) |
| 23 | : memory_manager(memory_manager), macro_interpreter(*this) {} | 23 | : memory_manager(memory_manager), macro_interpreter(*this) {} |
| 24 | 24 | ||
| 25 | void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) { | ||
| 26 | uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code); | ||
| 27 | } | ||
| 28 | |||
| 29 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 25 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 30 | auto macro_code = uploaded_macros.find(method); | 26 | auto macro_code = uploaded_macros.find(method); |
| 31 | // The requested macro must have been uploaded already. | 27 | // The requested macro must have been uploaded already. |
| @@ -37,9 +33,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | |||
| 37 | } | 33 | } |
| 38 | 34 | ||
| 39 | void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | 35 | void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { |
| 40 | ASSERT_MSG(method < Regs::NUM_REGS, | ||
| 41 | "Invalid Maxwell3D register, increase the size of the Regs structure"); | ||
| 42 | |||
| 43 | auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); | 36 | auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); |
| 44 | 37 | ||
| 45 | // It is an error to write to a register other than the current macro's ARG register before it | 38 | // It is an error to write to a register other than the current macro's ARG register before it |
| @@ -68,6 +61,9 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 68 | return; | 61 | return; |
| 69 | } | 62 | } |
| 70 | 63 | ||
| 64 | ASSERT_MSG(method < Regs::NUM_REGS, | ||
| 65 | "Invalid Maxwell3D register, increase the size of the Regs structure"); | ||
| 66 | |||
| 71 | if (debug_context) { | 67 | if (debug_context) { |
| 72 | debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); | 68 | debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); |
| 73 | } | 69 | } |
| @@ -75,6 +71,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 75 | regs.reg_array[method] = value; | 71 | regs.reg_array[method] = value; |
| 76 | 72 | ||
| 77 | switch (method) { | 73 | switch (method) { |
| 74 | case MAXWELL3D_REG_INDEX(macros.data): { | ||
| 75 | ProcessMacroUpload(value); | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | case MAXWELL3D_REG_INDEX(code_address.code_address_high): | 78 | case MAXWELL3D_REG_INDEX(code_address.code_address_high): |
| 79 | case MAXWELL3D_REG_INDEX(code_address.code_address_low): { | 79 | case MAXWELL3D_REG_INDEX(code_address.code_address_low): { |
| 80 | // Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS | 80 | // Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS |
| @@ -141,17 +141,48 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 141 | } | 141 | } |
| 142 | } | 142 | } |
| 143 | 143 | ||
| 144 | void Maxwell3D::ProcessMacroUpload(u32 data) { | ||
| 145 | // Store the uploaded macro code to interpret them when they're called. | ||
| 146 | auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; | ||
| 147 | macro.push_back(data); | ||
| 148 | } | ||
| 149 | |||
| 144 | void Maxwell3D::ProcessQueryGet() { | 150 | void Maxwell3D::ProcessQueryGet() { |
| 145 | GPUVAddr sequence_address = regs.query.QueryAddress(); | 151 | GPUVAddr sequence_address = regs.query.QueryAddress(); |
| 146 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application | 152 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application |
| 147 | // VAddr before writing. | 153 | // VAddr before writing. |
| 148 | VAddr address = memory_manager.PhysicalToVirtualAddress(sequence_address); | 154 | boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address); |
| 155 | |||
| 156 | // TODO(Subv): Support the other query units. | ||
| 157 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | ||
| 158 | "Units other than CROP are unimplemented"); | ||
| 159 | ASSERT_MSG(regs.query.query_get.short_query, | ||
| 160 | "Writing the entire query result structure is unimplemented"); | ||
| 161 | |||
| 162 | u32 value = Memory::Read32(*address); | ||
| 163 | u32 result = 0; | ||
| 164 | |||
| 165 | // TODO(Subv): Support the other query variables | ||
| 166 | switch (regs.query.query_get.select) { | ||
| 167 | case Regs::QuerySelect::Zero: | ||
| 168 | result = 0; | ||
| 169 | break; | ||
| 170 | default: | ||
| 171 | UNIMPLEMENTED_MSG("Unimplemented query select type %u", | ||
| 172 | static_cast<u32>(regs.query.query_get.select.Value())); | ||
| 173 | } | ||
| 174 | |||
| 175 | // TODO(Subv): Research and implement how query sync conditions work. | ||
| 149 | 176 | ||
| 150 | switch (regs.query.query_get.mode) { | 177 | switch (regs.query.query_get.mode) { |
| 151 | case Regs::QueryMode::Write: { | 178 | case Regs::QueryMode::Write: |
| 179 | case Regs::QueryMode::Write2: { | ||
| 152 | // Write the current query sequence to the sequence address. | 180 | // Write the current query sequence to the sequence address. |
| 153 | u32 sequence = regs.query.query_sequence; | 181 | u32 sequence = regs.query.query_sequence; |
| 154 | Memory::Write32(address, sequence); | 182 | Memory::Write32(*address, sequence); |
| 183 | |||
| 184 | // TODO(Subv): Write the proper query response structure to the address when not using short | ||
| 185 | // mode. | ||
| 155 | break; | 186 | break; |
| 156 | } | 187 | } |
| 157 | default: | 188 | default: |
| @@ -161,8 +192,8 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 161 | } | 192 | } |
| 162 | 193 | ||
| 163 | void Maxwell3D::DrawArrays() { | 194 | void Maxwell3D::DrawArrays() { |
| 164 | LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(), | 195 | NGLOG_DEBUG(HW_GPU, "called, topology={}, count={}", |
| 165 | regs.vertex_buffer.count); | 196 | static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); |
| 166 | ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); | 197 | ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); |
| 167 | 198 | ||
| 168 | auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); | 199 | auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); |
| @@ -200,10 +231,10 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 200 | // Don't allow writing past the end of the buffer. | 231 | // Don't allow writing past the end of the buffer. |
| 201 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 232 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); |
| 202 | 233 | ||
| 203 | VAddr address = | 234 | boost::optional<VAddr> address = |
| 204 | memory_manager.PhysicalToVirtualAddress(buffer_address + regs.const_buffer.cb_pos); | 235 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |
| 205 | 236 | ||
| 206 | Memory::Write32(address, value); | 237 | Memory::Write32(*address, value); |
| 207 | 238 | ||
| 208 | // Increment the current buffer position. | 239 | // Increment the current buffer position. |
| 209 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 240 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; |
| @@ -213,10 +244,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 213 | GPUVAddr tic_base_address = regs.tic.TICAddress(); | 244 | GPUVAddr tic_base_address = regs.tic.TICAddress(); |
| 214 | 245 | ||
| 215 | GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); | 246 | GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); |
| 216 | VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu); | 247 | boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); |
| 217 | 248 | ||
| 218 | Texture::TICEntry tic_entry; | 249 | Texture::TICEntry tic_entry; |
| 219 | Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); | 250 | Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 220 | 251 | ||
| 221 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || | 252 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || |
| 222 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, | 253 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, |
| @@ -243,10 +274,10 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | |||
| 243 | GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); | 274 | GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); |
| 244 | 275 | ||
| 245 | GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); | 276 | GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); |
| 246 | VAddr tsc_address_cpu = memory_manager.PhysicalToVirtualAddress(tsc_address_gpu); | 277 | boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); |
| 247 | 278 | ||
| 248 | Texture::TSCEntry tsc_entry; | 279 | Texture::TSCEntry tsc_entry; |
| 249 | Memory::ReadBlock(tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); | 280 | Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); |
| 250 | return tsc_entry; | 281 | return tsc_entry; |
| 251 | } | 282 | } |
| 252 | 283 | ||
| @@ -268,7 +299,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | |||
| 268 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { | 299 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { |
| 269 | 300 | ||
| 270 | Texture::TextureHandle tex_handle{ | 301 | Texture::TextureHandle tex_handle{ |
| 271 | Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))}; | 302 | Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))}; |
| 272 | 303 | ||
| 273 | Texture::FullTextureInfo tex_info{}; | 304 | Texture::FullTextureInfo tex_info{}; |
| 274 | // TODO(Subv): Use the shader to determine which textures are actually accessed. | 305 | // TODO(Subv): Use the shader to determine which textures are actually accessed. |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d4fcedace..5cf62fb01 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -31,7 +31,7 @@ public: | |||
| 31 | /// Register structure of the Maxwell3D engine. | 31 | /// Register structure of the Maxwell3D engine. |
| 32 | /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. | 32 | /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. |
| 33 | struct Regs { | 33 | struct Regs { |
| 34 | static constexpr size_t NUM_REGS = 0xE36; | 34 | static constexpr size_t NUM_REGS = 0xE00; |
| 35 | 35 | ||
| 36 | static constexpr size_t NumRenderTargets = 8; | 36 | static constexpr size_t NumRenderTargets = 8; |
| 37 | static constexpr size_t NumViewports = 16; | 37 | static constexpr size_t NumViewports = 16; |
| @@ -46,6 +46,29 @@ public: | |||
| 46 | enum class QueryMode : u32 { | 46 | enum class QueryMode : u32 { |
| 47 | Write = 0, | 47 | Write = 0, |
| 48 | Sync = 1, | 48 | Sync = 1, |
| 49 | // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 | ||
| 50 | // is. | ||
| 51 | Write2 = 2, | ||
| 52 | }; | ||
| 53 | |||
| 54 | enum class QueryUnit : u32 { | ||
| 55 | VFetch = 1, | ||
| 56 | VP = 2, | ||
| 57 | Rast = 4, | ||
| 58 | StrmOut = 5, | ||
| 59 | GP = 6, | ||
| 60 | ZCull = 7, | ||
| 61 | Prop = 10, | ||
| 62 | Crop = 15, | ||
| 63 | }; | ||
| 64 | |||
| 65 | enum class QuerySelect : u32 { | ||
| 66 | Zero = 0, | ||
| 67 | }; | ||
| 68 | |||
| 69 | enum class QuerySyncCondition : u32 { | ||
| 70 | NotEqual = 0, | ||
| 71 | GreaterThan = 1, | ||
| 49 | }; | 72 | }; |
| 50 | 73 | ||
| 51 | enum class ShaderProgram : u32 { | 74 | enum class ShaderProgram : u32 { |
| @@ -299,7 +322,15 @@ public: | |||
| 299 | 322 | ||
| 300 | union { | 323 | union { |
| 301 | struct { | 324 | struct { |
| 302 | INSERT_PADDING_WORDS(0x200); | 325 | INSERT_PADDING_WORDS(0x45); |
| 326 | |||
| 327 | struct { | ||
| 328 | INSERT_PADDING_WORDS(1); | ||
| 329 | u32 data; | ||
| 330 | u32 entry; | ||
| 331 | } macros; | ||
| 332 | |||
| 333 | INSERT_PADDING_WORDS(0x1B8); | ||
| 303 | 334 | ||
| 304 | struct { | 335 | struct { |
| 305 | u32 address_high; | 336 | u32 address_high; |
| @@ -476,7 +507,10 @@ public: | |||
| 476 | u32 raw; | 507 | u32 raw; |
| 477 | BitField<0, 2, QueryMode> mode; | 508 | BitField<0, 2, QueryMode> mode; |
| 478 | BitField<4, 1, u32> fence; | 509 | BitField<4, 1, u32> fence; |
| 479 | BitField<12, 4, u32> unit; | 510 | BitField<12, 4, QueryUnit> unit; |
| 511 | BitField<16, 1, QuerySyncCondition> sync_cond; | ||
| 512 | BitField<23, 5, QuerySelect> select; | ||
| 513 | BitField<28, 1, u32> short_query; | ||
| 480 | } query_get; | 514 | } query_get; |
| 481 | 515 | ||
| 482 | GPUVAddr QueryAddress() const { | 516 | GPUVAddr QueryAddress() const { |
| @@ -500,6 +534,11 @@ public: | |||
| 500 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | | 534 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | |
| 501 | start_low); | 535 | start_low); |
| 502 | } | 536 | } |
| 537 | |||
| 538 | bool IsEnabled() const { | ||
| 539 | return enable != 0 && StartAddress() != 0; | ||
| 540 | } | ||
| 541 | |||
| 503 | } vertex_array[NumVertexArrays]; | 542 | } vertex_array[NumVertexArrays]; |
| 504 | 543 | ||
| 505 | Blend blend; | 544 | Blend blend; |
| @@ -574,7 +613,7 @@ public: | |||
| 574 | u32 size[MaxShaderStage]; | 613 | u32 size[MaxShaderStage]; |
| 575 | } tex_info_buffers; | 614 | } tex_info_buffers; |
| 576 | 615 | ||
| 577 | INSERT_PADDING_WORDS(0x102); | 616 | INSERT_PADDING_WORDS(0xCC); |
| 578 | }; | 617 | }; |
| 579 | std::array<u32, NUM_REGS> reg_array; | 618 | std::array<u32, NUM_REGS> reg_array; |
| 580 | }; | 619 | }; |
| @@ -606,9 +645,6 @@ public: | |||
| 606 | /// Write the value to the register identified by method. | 645 | /// Write the value to the register identified by method. |
| 607 | void WriteReg(u32 method, u32 value, u32 remaining_params); | 646 | void WriteReg(u32 method, u32 value, u32 remaining_params); |
| 608 | 647 | ||
| 609 | /// Uploads the code for a GPU macro program associated with the specified entry. | ||
| 610 | void SubmitMacroCode(u32 entry, std::vector<u32> code); | ||
| 611 | |||
| 612 | /// Returns a list of enabled textures for the specified shader stage. | 648 | /// Returns a list of enabled textures for the specified shader stage. |
| 613 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; | 649 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; |
| 614 | 650 | ||
| @@ -639,6 +675,9 @@ private: | |||
| 639 | */ | 675 | */ |
| 640 | void CallMacroMethod(u32 method, std::vector<u32> parameters); | 676 | void CallMacroMethod(u32 method, std::vector<u32> parameters); |
| 641 | 677 | ||
| 678 | /// Handles writes to the macro uploading registers. | ||
| 679 | void ProcessMacroUpload(u32 data); | ||
| 680 | |||
| 642 | /// Handles a write to the QUERY_GET register. | 681 | /// Handles a write to the QUERY_GET register. |
| 643 | void ProcessQueryGet(); | 682 | void ProcessQueryGet(); |
| 644 | 683 | ||
| @@ -656,6 +695,7 @@ private: | |||
| 656 | static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ | 695 | static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ |
| 657 | "Field " #field_name " has invalid position") | 696 | "Field " #field_name " has invalid position") |
| 658 | 697 | ||
| 698 | ASSERT_REG_POSITION(macros, 0x45); | ||
| 659 | ASSERT_REG_POSITION(rt, 0x200); | 699 | ASSERT_REG_POSITION(rt, 0x200); |
| 660 | ASSERT_REG_POSITION(viewport_transform[0], 0x280); | 700 | ASSERT_REG_POSITION(viewport_transform[0], 0x280); |
| 661 | ASSERT_REG_POSITION(viewport, 0x300); | 701 | ASSERT_REG_POSITION(viewport, 0x300); |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5a006aee5..f4d11fa5d 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -214,6 +214,20 @@ union Instruction { | |||
| 214 | BitField<56, 1, u64> neg_b; | 214 | BitField<56, 1, u64> neg_b; |
| 215 | } fsetp; | 215 | } fsetp; |
| 216 | 216 | ||
| 217 | union { | ||
| 218 | BitField<39, 3, u64> pred39; | ||
| 219 | BitField<42, 1, u64> neg_pred; | ||
| 220 | BitField<43, 1, u64> neg_a; | ||
| 221 | BitField<44, 1, u64> abs_b; | ||
| 222 | BitField<45, 2, PredOperation> op; | ||
| 223 | BitField<48, 4, PredCondition> cond; | ||
| 224 | BitField<53, 1, u64> neg_b; | ||
| 225 | BitField<54, 1, u64> abs_a; | ||
| 226 | BitField<52, 1, u64> bf; | ||
| 227 | BitField<55, 1, u64> ftz; | ||
| 228 | BitField<56, 1, u64> neg_imm; | ||
| 229 | } fset; | ||
| 230 | |||
| 217 | BitField<61, 1, u64> is_b_imm; | 231 | BitField<61, 1, u64> is_b_imm; |
| 218 | BitField<60, 1, u64> is_b_gpr; | 232 | BitField<60, 1, u64> is_b_gpr; |
| 219 | BitField<59, 1, u64> is_c_gpr; | 233 | BitField<59, 1, u64> is_c_gpr; |
| @@ -261,6 +275,9 @@ public: | |||
| 261 | I2F_C, | 275 | I2F_C, |
| 262 | I2F_R, | 276 | I2F_R, |
| 263 | I2F_IMM, | 277 | I2F_IMM, |
| 278 | I2I_C, | ||
| 279 | I2I_R, | ||
| 280 | I2I_IMM, | ||
| 264 | LOP32I, | 281 | LOP32I, |
| 265 | MOV_C, | 282 | MOV_C, |
| 266 | MOV_R, | 283 | MOV_R, |
| @@ -272,6 +289,9 @@ public: | |||
| 272 | FSETP_C, // Set Predicate | 289 | FSETP_C, // Set Predicate |
| 273 | FSETP_R, | 290 | FSETP_R, |
| 274 | FSETP_IMM, | 291 | FSETP_IMM, |
| 292 | FSET_C, | ||
| 293 | FSET_R, | ||
| 294 | FSET_IMM, | ||
| 275 | ISETP_C, | 295 | ISETP_C, |
| 276 | ISETP_IMM, | 296 | ISETP_IMM, |
| 277 | ISETP_R, | 297 | ISETP_R, |
| @@ -283,8 +303,9 @@ public: | |||
| 283 | Ffma, | 303 | Ffma, |
| 284 | Flow, | 304 | Flow, |
| 285 | Memory, | 305 | Memory, |
| 286 | FloatPredicate, | 306 | FloatSet, |
| 287 | IntegerPredicate, | 307 | FloatSetPredicate, |
| 308 | IntegerSetPredicate, | ||
| 288 | Unknown, | 309 | Unknown, |
| 289 | }; | 310 | }; |
| 290 | 311 | ||
| @@ -409,6 +430,9 @@ private: | |||
| 409 | INST("0100110010111---", Id::I2F_C, Type::Arithmetic, "I2F_C"), | 430 | INST("0100110010111---", Id::I2F_C, Type::Arithmetic, "I2F_C"), |
| 410 | INST("0101110010111---", Id::I2F_R, Type::Arithmetic, "I2F_R"), | 431 | INST("0101110010111---", Id::I2F_R, Type::Arithmetic, "I2F_R"), |
| 411 | INST("0011100-10111---", Id::I2F_IMM, Type::Arithmetic, "I2F_IMM"), | 432 | INST("0011100-10111---", Id::I2F_IMM, Type::Arithmetic, "I2F_IMM"), |
| 433 | INST("0100110011100---", Id::I2I_C, Type::Arithmetic, "I2I_C"), | ||
| 434 | INST("0101110011100---", Id::I2I_R, Type::Arithmetic, "I2I_R"), | ||
| 435 | INST("01110001-1000---", Id::I2I_IMM, Type::Arithmetic, "I2I_IMM"), | ||
| 412 | INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"), | 436 | INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"), |
| 413 | INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), | 437 | INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), |
| 414 | INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), | 438 | INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), |
| @@ -417,12 +441,15 @@ private: | |||
| 417 | INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"), | 441 | INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"), |
| 418 | INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"), | 442 | INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"), |
| 419 | INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"), | 443 | INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"), |
| 420 | INST("010010111011----", Id::FSETP_C, Type::FloatPredicate, "FSETP_C"), | 444 | INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), |
| 421 | INST("010110111011----", Id::FSETP_R, Type::FloatPredicate, "FSETP_R"), | 445 | INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), |
| 422 | INST("0011011-1011----", Id::FSETP_IMM, Type::FloatPredicate, "FSETP_IMM"), | 446 | INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), |
| 423 | INST("010010110110----", Id::ISETP_C, Type::IntegerPredicate, "ISETP_C"), | 447 | INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), |
| 424 | INST("010110110110----", Id::ISETP_R, Type::IntegerPredicate, "ISETP_R"), | 448 | INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), |
| 425 | INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerPredicate, "ISETP_IMM"), | 449 | INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), |
| 450 | INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), | ||
| 451 | INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), | ||
| 452 | INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), | ||
| 426 | }; | 453 | }; |
| 427 | #undef INST | 454 | #undef INST |
| 428 | std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { | 455 | std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 9463cd5d6..9eb143918 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -12,7 +12,7 @@ namespace Tegra { | |||
| 12 | GPU::GPU() { | 12 | GPU::GPU() { |
| 13 | memory_manager = std::make_unique<MemoryManager>(); | 13 | memory_manager = std::make_unique<MemoryManager>(); |
| 14 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager); | 14 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager); |
| 15 | fermi_2d = std::make_unique<Engines::Fermi2D>(); | 15 | fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); |
| 16 | maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); | 16 | maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); |
| 17 | } | 17 | } |
| 18 | 18 | ||
| @@ -22,4 +22,16 @@ const Tegra::Engines::Maxwell3D& GPU::Get3DEngine() const { | |||
| 22 | return *maxwell_3d; | 22 | return *maxwell_3d; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | ||
| 26 | ASSERT(format != RenderTargetFormat::NONE); | ||
| 27 | |||
| 28 | switch (format) { | ||
| 29 | case RenderTargetFormat::RGBA8_UNORM: | ||
| 30 | case RenderTargetFormat::RGB10_A2_UNORM: | ||
| 31 | return 4; | ||
| 32 | default: | ||
| 33 | UNIMPLEMENTED_MSG("Unimplemented render target format %u", static_cast<u32>(format)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 25 | } // namespace Tegra | 37 | } // namespace Tegra |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2888daedc..f168a5171 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -21,6 +21,9 @@ enum class RenderTargetFormat : u32 { | |||
| 21 | RGBA8_SRGB = 0xD6, | 21 | RGBA8_SRGB = 0xD6, |
| 22 | }; | 22 | }; |
| 23 | 23 | ||
| 24 | /// Returns the number of bytes per pixel of each rendertarget format. | ||
| 25 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format); | ||
| 26 | |||
| 24 | class DebugContext; | 27 | class DebugContext; |
| 25 | 28 | ||
| 26 | /** | 29 | /** |
| @@ -86,8 +89,6 @@ public: | |||
| 86 | } | 89 | } |
| 87 | 90 | ||
| 88 | private: | 91 | private: |
| 89 | static constexpr u32 InvalidGraphMacroEntry = 0xFFFFFFFF; | ||
| 90 | |||
| 91 | /// Writes a single register in the engine bound to the specified subchannel | 92 | /// Writes a single register in the engine bound to the specified subchannel |
| 92 | void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params); | 93 | void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params); |
| 93 | 94 | ||
| @@ -100,11 +101,6 @@ private: | |||
| 100 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | 101 | std::unique_ptr<Engines::Fermi2D> fermi_2d; |
| 101 | /// Compute engine | 102 | /// Compute engine |
| 102 | std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; | 103 | std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; |
| 103 | |||
| 104 | /// Entry of the macro that is currently being uploaded | ||
| 105 | u32 current_macro_entry = InvalidGraphMacroEntry; | ||
| 106 | /// Code being uploaded for the current macro | ||
| 107 | std::vector<u32> current_macro_code; | ||
| 108 | }; | 104 | }; |
| 109 | 105 | ||
| 110 | } // namespace Tegra | 106 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 2789a4ca1..25984439d 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -2,109 +2,118 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/alignment.h" | ||
| 5 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 6 | #include "video_core/memory_manager.h" | 7 | #include "video_core/memory_manager.h" |
| 7 | 8 | ||
| 8 | namespace Tegra { | 9 | namespace Tegra { |
| 9 | 10 | ||
| 10 | PAddr MemoryManager::AllocateSpace(u64 size, u64 align) { | 11 | GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { |
| 11 | boost::optional<PAddr> paddr = FindFreeBlock(size, align); | 12 | boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align); |
| 12 | ASSERT(paddr); | 13 | ASSERT(gpu_addr); |
| 13 | 14 | ||
| 14 | for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) { | 15 | for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { |
| 15 | PageSlot(*paddr + offset) = static_cast<u64>(PageStatus::Allocated); | 16 | ASSERT(PageSlot(*gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped)); |
| 17 | PageSlot(*gpu_addr + offset) = static_cast<u64>(PageStatus::Allocated); | ||
| 16 | } | 18 | } |
| 17 | 19 | ||
| 18 | return *paddr; | 20 | return *gpu_addr; |
| 19 | } | 21 | } |
| 20 | 22 | ||
| 21 | PAddr MemoryManager::AllocateSpace(PAddr paddr, u64 size, u64 align) { | 23 | GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { |
| 22 | for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) { | 24 | for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { |
| 23 | if (IsPageMapped(paddr + offset)) { | 25 | ASSERT(PageSlot(gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped)); |
| 24 | return AllocateSpace(size, align); | 26 | PageSlot(gpu_addr + offset) = static_cast<u64>(PageStatus::Allocated); |
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) { | ||
| 29 | PageSlot(paddr + offset) = static_cast<u64>(PageStatus::Allocated); | ||
| 30 | } | 27 | } |
| 31 | 28 | ||
| 32 | return paddr; | 29 | return gpu_addr; |
| 33 | } | 30 | } |
| 34 | 31 | ||
| 35 | PAddr MemoryManager::MapBufferEx(VAddr vaddr, u64 size) { | 32 | GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { |
| 36 | vaddr &= ~Memory::PAGE_MASK; | 33 | boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE); |
| 37 | 34 | ASSERT(gpu_addr); | |
| 38 | boost::optional<PAddr> paddr = FindFreeBlock(size); | ||
| 39 | ASSERT(paddr); | ||
| 40 | 35 | ||
| 41 | for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) { | 36 | for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { |
| 42 | PageSlot(*paddr + offset) = vaddr + offset; | 37 | ASSERT(PageSlot(*gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped)); |
| 38 | PageSlot(*gpu_addr + offset) = cpu_addr + offset; | ||
| 43 | } | 39 | } |
| 44 | 40 | ||
| 45 | return *paddr; | 41 | MappedRegion region{cpu_addr, *gpu_addr, size}; |
| 42 | mapped_regions.push_back(region); | ||
| 43 | |||
| 44 | return *gpu_addr; | ||
| 46 | } | 45 | } |
| 47 | 46 | ||
| 48 | PAddr MemoryManager::MapBufferEx(VAddr vaddr, PAddr paddr, u64 size) { | 47 | GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { |
| 49 | vaddr &= ~Memory::PAGE_MASK; | 48 | ASSERT((gpu_addr & PAGE_MASK) == 0); |
| 50 | paddr &= ~Memory::PAGE_MASK; | ||
| 51 | 49 | ||
| 52 | for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) { | 50 | for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { |
| 53 | if (PageSlot(paddr + offset) != static_cast<u64>(PageStatus::Allocated)) { | 51 | ASSERT(PageSlot(gpu_addr + offset) == static_cast<u64>(PageStatus::Allocated)); |
| 54 | return MapBufferEx(vaddr, size); | 52 | PageSlot(gpu_addr + offset) = cpu_addr + offset; |
| 55 | } | ||
| 56 | } | 53 | } |
| 57 | 54 | ||
| 58 | for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) { | 55 | MappedRegion region{cpu_addr, gpu_addr, size}; |
| 59 | PageSlot(paddr + offset) = vaddr + offset; | 56 | mapped_regions.push_back(region); |
| 60 | } | ||
| 61 | 57 | ||
| 62 | return paddr; | 58 | return gpu_addr; |
| 63 | } | 59 | } |
| 64 | 60 | ||
| 65 | boost::optional<PAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) { | 61 | boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) { |
| 66 | PAddr paddr{}; | 62 | GPUVAddr gpu_addr = 0; |
| 67 | u64 free_space{}; | 63 | u64 free_space = 0; |
| 68 | align = (align + Memory::PAGE_MASK) & ~Memory::PAGE_MASK; | 64 | align = (align + PAGE_MASK) & ~PAGE_MASK; |
| 69 | 65 | ||
| 70 | while (paddr + free_space < MAX_ADDRESS) { | 66 | while (gpu_addr + free_space < MAX_ADDRESS) { |
| 71 | if (!IsPageMapped(paddr + free_space)) { | 67 | if (!IsPageMapped(gpu_addr + free_space)) { |
| 72 | free_space += Memory::PAGE_SIZE; | 68 | free_space += PAGE_SIZE; |
| 73 | if (free_space >= size) { | 69 | if (free_space >= size) { |
| 74 | return paddr; | 70 | return gpu_addr; |
| 75 | } | 71 | } |
| 76 | } else { | 72 | } else { |
| 77 | paddr += free_space + Memory::PAGE_SIZE; | 73 | gpu_addr += free_space + PAGE_SIZE; |
| 78 | free_space = 0; | 74 | free_space = 0; |
| 79 | const u64 remainder{paddr % align}; | 75 | gpu_addr = Common::AlignUp(gpu_addr, align); |
| 80 | if (!remainder) { | ||
| 81 | paddr = (paddr - remainder) + align; | ||
| 82 | } | ||
| 83 | } | 76 | } |
| 84 | } | 77 | } |
| 85 | 78 | ||
| 86 | return {}; | 79 | return {}; |
| 87 | } | 80 | } |
| 88 | 81 | ||
| 89 | VAddr MemoryManager::PhysicalToVirtualAddress(PAddr paddr) { | 82 | boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { |
| 90 | VAddr base_addr = PageSlot(paddr); | 83 | VAddr base_addr = PageSlot(gpu_addr); |
| 91 | ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped)); | 84 | ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped)); |
| 92 | return base_addr + (paddr & Memory::PAGE_MASK); | 85 | |
| 86 | if (base_addr == static_cast<u64>(PageStatus::Allocated)) { | ||
| 87 | return {}; | ||
| 88 | } | ||
| 89 | |||
| 90 | return base_addr + (gpu_addr & PAGE_MASK); | ||
| 91 | } | ||
| 92 | |||
| 93 | std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { | ||
| 94 | std::vector<GPUVAddr> results; | ||
| 95 | for (const auto& region : mapped_regions) { | ||
| 96 | if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { | ||
| 97 | u64 offset = cpu_addr - region.cpu_addr; | ||
| 98 | results.push_back(region.gpu_addr + offset); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | return results; | ||
| 93 | } | 102 | } |
| 94 | 103 | ||
| 95 | bool MemoryManager::IsPageMapped(PAddr paddr) { | 104 | bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) { |
| 96 | return PageSlot(paddr) != static_cast<u64>(PageStatus::Unmapped); | 105 | return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped); |
| 97 | } | 106 | } |
| 98 | 107 | ||
| 99 | VAddr& MemoryManager::PageSlot(PAddr paddr) { | 108 | VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { |
| 100 | auto& block = page_table[(paddr >> (Memory::PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]; | 109 | auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]; |
| 101 | if (!block) { | 110 | if (!block) { |
| 102 | block = std::make_unique<PageBlock>(); | 111 | block = std::make_unique<PageBlock>(); |
| 103 | for (unsigned index = 0; index < PAGE_BLOCK_SIZE; index++) { | 112 | for (unsigned index = 0; index < PAGE_BLOCK_SIZE; index++) { |
| 104 | (*block)[index] = static_cast<u64>(PageStatus::Unmapped); | 113 | (*block)[index] = static_cast<u64>(PageStatus::Unmapped); |
| 105 | } | 114 | } |
| 106 | } | 115 | } |
| 107 | return (*block)[(paddr >> Memory::PAGE_BITS) & PAGE_BLOCK_MASK]; | 116 | return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK]; |
| 108 | } | 117 | } |
| 109 | 118 | ||
| 110 | } // namespace Tegra | 119 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 47da7acd6..08140c83a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -6,8 +6,11 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <boost/optional.hpp> | ||
| 12 | |||
| 9 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 10 | #include "core/memory.h" | ||
| 11 | 14 | ||
| 12 | namespace Tegra { | 15 | namespace Tegra { |
| 13 | 16 | ||
| @@ -18,16 +21,21 @@ class MemoryManager final { | |||
| 18 | public: | 21 | public: |
| 19 | MemoryManager() = default; | 22 | MemoryManager() = default; |
| 20 | 23 | ||
| 21 | PAddr AllocateSpace(u64 size, u64 align); | 24 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| 22 | PAddr AllocateSpace(PAddr paddr, u64 size, u64 align); | 25 | GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); |
| 23 | PAddr MapBufferEx(VAddr vaddr, u64 size); | 26 | GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); |
| 24 | PAddr MapBufferEx(VAddr vaddr, PAddr paddr, u64 size); | 27 | GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size); |
| 25 | VAddr PhysicalToVirtualAddress(PAddr paddr); | 28 | boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); |
| 29 | std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const; | ||
| 30 | |||
| 31 | static constexpr u64 PAGE_BITS = 16; | ||
| 32 | static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; | ||
| 33 | static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; | ||
| 26 | 34 | ||
| 27 | private: | 35 | private: |
| 28 | boost::optional<PAddr> FindFreeBlock(u64 size, u64 align = 1); | 36 | boost::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1); |
| 29 | bool IsPageMapped(PAddr paddr); | 37 | bool IsPageMapped(GPUVAddr gpu_addr); |
| 30 | VAddr& PageSlot(PAddr paddr); | 38 | VAddr& PageSlot(GPUVAddr gpu_addr); |
| 31 | 39 | ||
| 32 | enum class PageStatus : u64 { | 40 | enum class PageStatus : u64 { |
| 33 | Unmapped = 0xFFFFFFFFFFFFFFFFULL, | 41 | Unmapped = 0xFFFFFFFFFFFFFFFFULL, |
| @@ -35,7 +43,7 @@ private: | |||
| 35 | }; | 43 | }; |
| 36 | 44 | ||
| 37 | static constexpr u64 MAX_ADDRESS{0x10000000000ULL}; | 45 | static constexpr u64 MAX_ADDRESS{0x10000000000ULL}; |
| 38 | static constexpr u64 PAGE_TABLE_BITS{14}; | 46 | static constexpr u64 PAGE_TABLE_BITS{10}; |
| 39 | static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS}; | 47 | static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS}; |
| 40 | static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1}; | 48 | static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1}; |
| 41 | static constexpr u64 PAGE_BLOCK_BITS{14}; | 49 | static constexpr u64 PAGE_BLOCK_BITS{14}; |
| @@ -44,6 +52,14 @@ private: | |||
| 44 | 52 | ||
| 45 | using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>; | 53 | using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>; |
| 46 | std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; | 54 | std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; |
| 55 | |||
| 56 | struct MappedRegion { | ||
| 57 | VAddr cpu_addr; | ||
| 58 | GPUVAddr gpu_addr; | ||
| 59 | u64 size; | ||
| 60 | }; | ||
| 61 | |||
| 62 | std::vector<MappedRegion> mapped_regions; | ||
| 47 | }; | 63 | }; |
| 48 | 64 | ||
| 49 | } // namespace Tegra | 65 | } // namespace Tegra |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 36629dd11..f0e48a802 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/gpu.h" | 8 | #include "video_core/gpu.h" |
| 9 | #include "video_core/memory_manager.h" | ||
| 9 | 10 | ||
| 10 | struct ScreenInfo; | 11 | struct ScreenInfo; |
| 11 | 12 | ||
| @@ -25,14 +26,14 @@ public: | |||
| 25 | virtual void FlushAll() = 0; | 26 | virtual void FlushAll() = 0; |
| 26 | 27 | ||
| 27 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 28 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 28 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | 29 | virtual void FlushRegion(Tegra::GPUVAddr addr, u64 size) = 0; |
| 29 | 30 | ||
| 30 | /// Notify rasterizer that any caches of the specified region should be invalidated | 31 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 31 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | 32 | virtual void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; |
| 32 | 33 | ||
| 33 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 34 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 34 | /// and invalidated | 35 | /// and invalidated |
| 35 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 36 | virtual void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; |
| 36 | 37 | ||
| 37 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 | 38 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 |
| 38 | virtual bool AccelerateDisplayTransfer(const void* config) { | 39 | virtual bool AccelerateDisplayTransfer(const void* config) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d4a0d6db..9b3542e10 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -116,7 +116,7 @@ RasterizerOpenGL::RasterizerOpenGL() { | |||
| 116 | 116 | ||
| 117 | glEnable(GL_BLEND); | 117 | glEnable(GL_BLEND); |
| 118 | 118 | ||
| 119 | LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); | 119 | NGLOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | RasterizerOpenGL::~RasterizerOpenGL() { | 122 | RasterizerOpenGL::~RasterizerOpenGL() { |
| @@ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() { | |||
| 127 | } | 127 | } |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | 130 | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, |
| 131 | GLintptr buffer_offset) { | ||
| 131 | MICROPROFILE_SCOPE(OpenGL_VAO); | 132 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 132 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | 133 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; |
| 133 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | 134 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; |
| @@ -136,43 +137,58 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | |||
| 136 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | 137 | state.draw.vertex_buffer = stream_buffer->GetHandle(); |
| 137 | state.Apply(); | 138 | state.Apply(); |
| 138 | 139 | ||
| 139 | // TODO(bunnei): Add support for 1+ vertex arrays | 140 | // Upload all guest vertex arrays sequentially to our buffer |
| 140 | const auto& vertex_array{regs.vertex_array[0]}; | 141 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 141 | const auto& vertex_array_limit{regs.vertex_array_limit[0]}; | 142 | const auto& vertex_array = regs.vertex_array[index]; |
| 142 | ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); | 143 | if (!vertex_array.IsEnabled()) |
| 143 | ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); | 144 | continue; |
| 144 | for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { | 145 | |
| 145 | ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); | 146 | const Tegra::GPUVAddr start = vertex_array.StartAddress(); |
| 147 | const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||
| 148 | |||
| 149 | ASSERT(end > start); | ||
| 150 | u64 size = end - start + 1; | ||
| 151 | |||
| 152 | // Copy vertex array data | ||
| 153 | res_cache.FlushRegion(start, size, nullptr); | ||
| 154 | Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); | ||
| 155 | |||
| 156 | // Bind the vertex array to the buffer at the current offset. | ||
| 157 | glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); | ||
| 158 | |||
| 159 | ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); | ||
| 160 | |||
| 161 | array_ptr += size; | ||
| 162 | buffer_offset += size; | ||
| 146 | } | 163 | } |
| 147 | 164 | ||
| 148 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 165 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| 149 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually used | 166 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually used |
| 150 | // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now | 167 | // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now |
| 151 | // to avoid OpenGL errors. | 168 | // to avoid OpenGL errors. |
| 169 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't | ||
| 170 | // assume every shader uses them all. | ||
| 152 | for (unsigned index = 0; index < 16; ++index) { | 171 | for (unsigned index = 0; index < 16; ++index) { |
| 153 | auto& attrib = regs.vertex_attrib_format[index]; | 172 | auto& attrib = regs.vertex_attrib_format[index]; |
| 154 | NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | 173 | NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", |
| 155 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | 174 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), |
| 156 | attrib.offset.Value(), attrib.IsNormalized()); | 175 | attrib.offset.Value(), attrib.IsNormalized()); |
| 157 | 176 | ||
| 158 | glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | 177 | auto& buffer = regs.vertex_array[attrib.buffer]; |
| 159 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, | 178 | ASSERT(buffer.IsEnabled()); |
| 160 | reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); | 179 | |
| 161 | glEnableVertexAttribArray(index); | 180 | glEnableVertexAttribArray(index); |
| 181 | glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | ||
| 182 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); | ||
| 183 | glVertexAttribBinding(index, attrib.buffer); | ||
| 184 | |||
| 162 | hw_vao_enabled_attributes[index] = true; | 185 | hw_vao_enabled_attributes[index] = true; |
| 163 | } | 186 | } |
| 164 | 187 | ||
| 165 | // Copy vertex array data | 188 | return {array_ptr, buffer_offset}; |
| 166 | const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; | ||
| 167 | const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; | ||
| 168 | res_cache.FlushRegion(data_addr, data_size, nullptr); | ||
| 169 | Memory::ReadBlock(data_addr, array_ptr, data_size); | ||
| 170 | |||
| 171 | array_ptr += data_size; | ||
| 172 | buffer_offset += data_size; | ||
| 173 | } | 189 | } |
| 174 | 190 | ||
| 175 | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { | 191 | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { |
| 176 | // Helper function for uploading uniform data | 192 | // Helper function for uploading uniform data |
| 177 | const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | 193 | const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { |
| 178 | if (has_ARB_direct_state_access) { | 194 | if (has_ARB_direct_state_access) { |
| @@ -190,8 +206,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||
| 190 | u32 current_constbuffer_bindpoint = 0; | 206 | u32 current_constbuffer_bindpoint = 0; |
| 191 | 207 | ||
| 192 | for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { | 208 | for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { |
| 193 | ptr_pos += sizeof(GLShader::MaxwellUniformData); | ||
| 194 | |||
| 195 | auto& shader_config = gpu.regs.shader_config[index]; | 209 | auto& shader_config = gpu.regs.shader_config[index]; |
| 196 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 210 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 197 | 211 | ||
| @@ -205,18 +219,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||
| 205 | } | 219 | } |
| 206 | 220 | ||
| 207 | // Upload uniform data as one UBO per stage | 221 | // Upload uniform data as one UBO per stage |
| 208 | const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | 222 | const GLintptr ubo_offset = buffer_offset; |
| 209 | copy_buffer(uniform_buffers[stage].handle, ubo_offset, | 223 | copy_buffer(uniform_buffers[stage].handle, ubo_offset, |
| 210 | sizeof(GLShader::MaxwellUniformData)); | 224 | sizeof(GLShader::MaxwellUniformData)); |
| 211 | GLShader::MaxwellUniformData* ub_ptr = | 225 | GLShader::MaxwellUniformData* ub_ptr = |
| 212 | reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); | 226 | reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr); |
| 213 | ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); | 227 | ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); |
| 214 | 228 | ||
| 229 | buffer_ptr += sizeof(GLShader::MaxwellUniformData); | ||
| 230 | buffer_offset += sizeof(GLShader::MaxwellUniformData); | ||
| 231 | |||
| 215 | // Fetch program code from memory | 232 | // Fetch program code from memory |
| 216 | GLShader::ProgramCode program_code; | 233 | GLShader::ProgramCode program_code; |
| 217 | const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; | 234 | const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; |
| 218 | const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; | 235 | const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; |
| 219 | Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); | 236 | Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); |
| 220 | GLShader::ShaderSetup setup{std::move(program_code)}; | 237 | GLShader::ShaderSetup setup{std::move(program_code)}; |
| 221 | 238 | ||
| 222 | GLShader::ShaderEntries shader_resources; | 239 | GLShader::ShaderEntries shader_resources; |
| @@ -235,8 +252,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||
| 235 | break; | 252 | break; |
| 236 | } | 253 | } |
| 237 | default: | 254 | default: |
| 238 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, | 255 | NGLOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset={:#010X}", |
| 239 | shader_config.enable.Value(), shader_config.offset); | 256 | index, shader_config.enable.Value(), shader_config.offset); |
| 240 | UNREACHABLE(); | 257 | UNREACHABLE(); |
| 241 | } | 258 | } |
| 242 | 259 | ||
| @@ -252,6 +269,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||
| 252 | shader_program_manager->UseTrivialGeometryShader(); | 269 | shader_program_manager->UseTrivialGeometryShader(); |
| 253 | } | 270 | } |
| 254 | 271 | ||
| 272 | size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||
| 273 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | ||
| 274 | |||
| 275 | size_t size = 0; | ||
| 276 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 277 | if (!regs.vertex_array[index].IsEnabled()) | ||
| 278 | continue; | ||
| 279 | |||
| 280 | const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); | ||
| 281 | const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||
| 282 | |||
| 283 | ASSERT(end > start); | ||
| 284 | size += end - start + 1; | ||
| 285 | } | ||
| 286 | |||
| 287 | return size; | ||
| 288 | } | ||
| 289 | |||
| 255 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | 290 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { |
| 256 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | 291 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; |
| 257 | DrawArrays(); | 292 | DrawArrays(); |
| @@ -329,44 +364,49 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 329 | const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; | 364 | const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; |
| 330 | const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; | 365 | const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; |
| 331 | 366 | ||
| 332 | // TODO(bunnei): Add support for 1+ vertex arrays | ||
| 333 | vs_input_size = vertex_num * regs.vertex_array[0].stride; | ||
| 334 | |||
| 335 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | 367 | state.draw.vertex_buffer = stream_buffer->GetHandle(); |
| 336 | state.Apply(); | 368 | state.Apply(); |
| 337 | 369 | ||
| 338 | size_t buffer_size = static_cast<size_t>(vs_input_size); | 370 | size_t buffer_size = CalculateVertexArraysSize(); |
| 371 | |||
| 339 | if (is_indexed) { | 372 | if (is_indexed) { |
| 340 | buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; | 373 | buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; |
| 341 | } | 374 | } |
| 342 | 375 | ||
| 343 | // Uniform space for the 5 shader stages | 376 | // Uniform space for the 5 shader stages |
| 344 | buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | 377 | buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + |
| 378 | sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||
| 345 | 379 | ||
| 346 | size_t ptr_pos = 0; | ||
| 347 | u8* buffer_ptr; | 380 | u8* buffer_ptr; |
| 348 | GLintptr buffer_offset; | 381 | GLintptr buffer_offset; |
| 349 | std::tie(buffer_ptr, buffer_offset) = | 382 | std::tie(buffer_ptr, buffer_offset) = |
| 350 | stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); | 383 | stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); |
| 351 | 384 | ||
| 352 | SetupVertexArray(buffer_ptr, buffer_offset); | 385 | u8* offseted_buffer; |
| 353 | ptr_pos += vs_input_size; | 386 | std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); |
| 387 | |||
| 388 | offseted_buffer = | ||
| 389 | reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||
| 390 | buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||
| 354 | 391 | ||
| 355 | // If indexed mode, copy the index buffer | 392 | // If indexed mode, copy the index buffer |
| 356 | GLintptr index_buffer_offset = 0; | 393 | GLintptr index_buffer_offset = 0; |
| 357 | if (is_indexed) { | 394 | if (is_indexed) { |
| 358 | ptr_pos = Common::AlignUp(ptr_pos, 4); | ||
| 359 | |||
| 360 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | 395 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; |
| 361 | const VAddr index_data_addr{ | 396 | const boost::optional<VAddr> index_data_addr{ |
| 362 | memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; | 397 | memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())}; |
| 363 | Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); | 398 | Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size); |
| 364 | 399 | ||
| 365 | index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | 400 | index_buffer_offset = buffer_offset; |
| 366 | ptr_pos += index_buffer_size; | 401 | offseted_buffer += index_buffer_size; |
| 402 | buffer_offset += index_buffer_size; | ||
| 367 | } | 403 | } |
| 368 | 404 | ||
| 369 | SetupShaders(buffer_ptr, buffer_offset, ptr_pos); | 405 | offseted_buffer = |
| 406 | reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||
| 407 | buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||
| 408 | |||
| 409 | SetupShaders(offseted_buffer, buffer_offset); | ||
| 370 | 410 | ||
| 371 | stream_buffer->Unmap(); | 411 | stream_buffer->Unmap(); |
| 372 | 412 | ||
| @@ -478,17 +518,17 @@ void RasterizerOpenGL::FlushAll() { | |||
| 478 | res_cache.FlushAll(); | 518 | res_cache.FlushAll(); |
| 479 | } | 519 | } |
| 480 | 520 | ||
| 481 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | 521 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { |
| 482 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 522 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 483 | res_cache.FlushRegion(addr, size); | 523 | res_cache.FlushRegion(addr, size); |
| 484 | } | 524 | } |
| 485 | 525 | ||
| 486 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 526 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { |
| 487 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 527 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 488 | res_cache.InvalidateRegion(addr, size, nullptr); | 528 | res_cache.InvalidateRegion(addr, size, nullptr); |
| 489 | } | 529 | } |
| 490 | 530 | ||
| 491 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 531 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { |
| 492 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 532 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 493 | res_cache.FlushRegion(addr, size); | 533 | res_cache.FlushRegion(addr, size); |
| 494 | res_cache.InvalidateRegion(addr, size, nullptr); | 534 | res_cache.InvalidateRegion(addr, size, nullptr); |
| @@ -519,7 +559,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu | |||
| 519 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 559 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 520 | 560 | ||
| 521 | SurfaceParams src_params; | 561 | SurfaceParams src_params; |
| 522 | src_params.addr = framebuffer_addr; | 562 | src_params.cpu_addr = framebuffer_addr; |
| 563 | src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); | ||
| 523 | src_params.width = std::min(framebuffer.width, pixel_stride); | 564 | src_params.width = std::min(framebuffer.width, pixel_stride); |
| 524 | src_params.height = framebuffer.height; | 565 | src_params.height = framebuffer.height; |
| 525 | src_params.stride = pixel_stride; | 566 | src_params.stride = pixel_stride; |
| @@ -618,9 +659,9 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr | |||
| 618 | buffer_draw_state.enabled = true; | 659 | buffer_draw_state.enabled = true; |
| 619 | buffer_draw_state.bindpoint = current_bindpoint + bindpoint; | 660 | buffer_draw_state.bindpoint = current_bindpoint + bindpoint; |
| 620 | 661 | ||
| 621 | VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address); | 662 | boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); |
| 622 | std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); | 663 | std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); |
| 623 | Memory::ReadBlock(addr, data.data(), data.size()); | 664 | Memory::ReadBlock(*addr, data.data(), data.size()); |
| 624 | 665 | ||
| 625 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); | 666 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); |
| 626 | glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); | 667 | glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 03e02b52a..9709e595e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <glad/glad.h> | 11 | #include <glad/glad.h> |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 14 | #include "video_core/memory_manager.h" | ||
| 14 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| 15 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 16 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| @@ -29,9 +30,9 @@ public: | |||
| 29 | void DrawArrays() override; | 30 | void DrawArrays() override; |
| 30 | void NotifyMaxwellRegisterChanged(u32 method) override; | 31 | void NotifyMaxwellRegisterChanged(u32 method) override; |
| 31 | void FlushAll() override; | 32 | void FlushAll() override; |
| 32 | void FlushRegion(VAddr addr, u64 size) override; | 33 | void FlushRegion(Tegra::GPUVAddr addr, u64 size) override; |
| 33 | void InvalidateRegion(VAddr addr, u64 size) override; | 34 | void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; |
| 34 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 35 | void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; |
| 35 | bool AccelerateDisplayTransfer(const void* config) override; | 36 | bool AccelerateDisplayTransfer(const void* config) override; |
| 36 | bool AccelerateTextureCopy(const void* config) override; | 37 | bool AccelerateTextureCopy(const void* config) override; |
| 37 | bool AccelerateFill(const void* config) override; | 38 | bool AccelerateFill(const void* config) override; |
| @@ -148,13 +149,13 @@ private: | |||
| 148 | static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; | 149 | static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; |
| 149 | std::unique_ptr<OGLStreamBuffer> stream_buffer; | 150 | std::unique_ptr<OGLStreamBuffer> stream_buffer; |
| 150 | 151 | ||
| 151 | GLsizeiptr vs_input_size; | 152 | size_t CalculateVertexArraysSize() const; |
| 152 | 153 | ||
| 153 | void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); | 154 | std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); |
| 154 | 155 | ||
| 155 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; | 156 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; |
| 156 | 157 | ||
| 157 | void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); | 158 | void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); |
| 158 | 159 | ||
| 159 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 160 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 160 | AccelDraw accelerate_draw; | 161 | AccelDraw accelerate_draw; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7410471cc..501d15e98 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -41,18 +41,15 @@ struct FormatTuple { | |||
| 41 | GLenum format; | 41 | GLenum format; |
| 42 | GLenum type; | 42 | GLenum type; |
| 43 | bool compressed; | 43 | bool compressed; |
| 44 | // How many pixels in the original texture are equivalent to one pixel in the compressed | ||
| 45 | // texture. | ||
| 46 | u32 compression_factor; | ||
| 47 | }; | 44 | }; |
| 48 | 45 | ||
| 49 | static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ | 46 | static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ |
| 50 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8 | 47 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 |
| 51 | {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5 | 48 | {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5 |
| 52 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false, 1}, // A2B10G10R10 | 49 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10 |
| 53 | {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 | 50 | {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 |
| 54 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23 | 51 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 |
| 55 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45 | 52 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 |
| 56 | }}; | 53 | }}; |
| 57 | 54 | ||
| 58 | static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | 55 | static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { |
| @@ -83,26 +80,30 @@ static u16 GetResolutionScaleFactor() { | |||
| 83 | } | 80 | } |
| 84 | 81 | ||
| 85 | template <bool morton_to_gl, PixelFormat format> | 82 | template <bool morton_to_gl, PixelFormat format> |
| 86 | void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, | 83 | void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, |
| 87 | VAddr end) { | 84 | Tegra::GPUVAddr start, Tegra::GPUVAddr end) { |
| 88 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; | 85 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 89 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | 86 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
| 87 | const auto& gpu = Core::System::GetInstance().GPU(); | ||
| 90 | 88 | ||
| 91 | if (morton_to_gl) { | 89 | if (morton_to_gl) { |
| 92 | auto data = Tegra::Texture::UnswizzleTexture( | 90 | auto data = Tegra::Texture::UnswizzleTexture( |
| 93 | base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, | 91 | *gpu.memory_manager->GpuToCpuAddress(base), |
| 94 | block_height); | 92 | SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); |
| 95 | std::memcpy(gl_buffer, data.data(), data.size()); | 93 | std::memcpy(gl_buffer, data.data(), data.size()); |
| 96 | } else { | 94 | } else { |
| 97 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check | 95 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check |
| 98 | // the configuration for this and perform more generic un/swizzle | 96 | // the configuration for this and perform more generic un/swizzle |
| 99 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); | 97 | NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); |
| 100 | VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, | 98 | VideoCore::MortonCopyPixels128( |
| 101 | Memory::GetPointer(base), gl_buffer, morton_to_gl); | 99 | stride, height, bytes_per_pixel, gl_bytes_per_pixel, |
| 100 | Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, | ||
| 101 | morton_to_gl); | ||
| 102 | } | 102 | } |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), | 105 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, |
| 106 | Tegra::GPUVAddr), | ||
| 106 | SurfaceParams::MaxPixelFormat> | 107 | SurfaceParams::MaxPixelFormat> |
| 107 | morton_to_gl_fns = { | 108 | morton_to_gl_fns = { |
| 108 | MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, | 109 | MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, |
| @@ -110,7 +111,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), | |||
| 110 | MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, | 111 | MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, |
| 111 | }; | 112 | }; |
| 112 | 113 | ||
| 113 | static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), | 114 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, |
| 115 | Tegra::GPUVAddr), | ||
| 114 | SurfaceParams::MaxPixelFormat> | 116 | SurfaceParams::MaxPixelFormat> |
| 115 | gl_to_morton_fns = { | 117 | gl_to_morton_fns = { |
| 116 | MortonCopy<false, PixelFormat::ABGR8>, | 118 | MortonCopy<false, PixelFormat::ABGR8>, |
| @@ -219,9 +221,9 @@ SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { | |||
| 219 | SurfaceParams params = *this; | 221 | SurfaceParams params = *this; |
| 220 | const u32 tiled_size = is_tiled ? 8 : 1; | 222 | const u32 tiled_size = is_tiled ? 8 : 1; |
| 221 | const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); | 223 | const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); |
| 222 | VAddr aligned_start = | 224 | Tegra::GPUVAddr aligned_start = |
| 223 | addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); | 225 | addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); |
| 224 | VAddr aligned_end = | 226 | Tegra::GPUVAddr aligned_end = |
| 225 | addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); | 227 | addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); |
| 226 | 228 | ||
| 227 | if (aligned_end - aligned_start > stride_tiled_bytes) { | 229 | if (aligned_end - aligned_start > stride_tiled_bytes) { |
| @@ -342,6 +344,13 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { | |||
| 342 | return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); | 344 | return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); |
| 343 | } | 345 | } |
| 344 | 346 | ||
| 347 | VAddr SurfaceParams::GetCpuAddr() const { | ||
| 348 | // When this function is used, only cpu_addr or (GPU) addr should be set, not both | ||
| 349 | ASSERT(!(cpu_addr && addr)); | ||
| 350 | const auto& gpu = Core::System::GetInstance().GPU(); | ||
| 351 | return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); | ||
| 352 | } | ||
| 353 | |||
| 345 | bool CachedSurface::CanFill(const SurfaceParams& dest_surface, | 354 | bool CachedSurface::CanFill(const SurfaceParams& dest_surface, |
| 346 | SurfaceInterval fill_interval) const { | 355 | SurfaceInterval fill_interval) const { |
| 347 | if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && | 356 | if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && |
| @@ -349,9 +358,9 @@ bool CachedSurface::CanFill(const SurfaceParams& dest_surface, | |||
| 349 | boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range | 358 | boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range |
| 350 | dest_surface.FromInterval(fill_interval).GetInterval() == | 359 | dest_surface.FromInterval(fill_interval).GetInterval() == |
| 351 | fill_interval) { // make sure interval is a rectangle in dest surface | 360 | fill_interval) { // make sure interval is a rectangle in dest surface |
| 352 | if (fill_size * 8 != dest_surface.GetFormatBpp()) { | 361 | if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { |
| 353 | // Check if bits repeat for our fill_size | 362 | // Check if bits repeat for our fill_size |
| 354 | const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); | 363 | const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); |
| 355 | std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); | 364 | std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); |
| 356 | 365 | ||
| 357 | for (u32 i = 0; i < dest_bytes_per_pixel; ++i) | 366 | for (u32 i = 0; i < dest_bytes_per_pixel; ++i) |
| @@ -456,15 +465,15 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac | |||
| 456 | } | 465 | } |
| 457 | 466 | ||
| 458 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); | 467 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); |
| 459 | void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { | 468 | void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { |
| 460 | ASSERT(type != SurfaceType::Fill); | 469 | ASSERT(type != SurfaceType::Fill); |
| 461 | 470 | ||
| 462 | u8* const texture_src_data = Memory::GetPointer(addr); | 471 | u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); |
| 463 | if (texture_src_data == nullptr) | 472 | if (texture_src_data == nullptr) |
| 464 | return; | 473 | return; |
| 465 | 474 | ||
| 466 | if (gl_buffer == nullptr) { | 475 | if (gl_buffer == nullptr) { |
| 467 | gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); | 476 | gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); |
| 468 | gl_buffer.reset(new u8[gl_buffer_size]); | 477 | gl_buffer.reset(new u8[gl_buffer_size]); |
| 469 | } | 478 | } |
| 470 | 479 | ||
| @@ -479,14 +488,15 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { | |||
| 479 | std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, | 488 | std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, |
| 480 | bytes_per_pixel * width * height); | 489 | bytes_per_pixel * width * height); |
| 481 | } else { | 490 | } else { |
| 482 | morton_to_gl_fns[static_cast<size_t>(pixel_format)]( | 491 | morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, |
| 483 | stride, block_height, height, &gl_buffer[0], addr, load_start, load_end); | 492 | GetActualHeight(), &gl_buffer[0], addr, |
| 493 | load_start, load_end); | ||
| 484 | } | 494 | } |
| 485 | } | 495 | } |
| 486 | 496 | ||
| 487 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | 497 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); |
| 488 | void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { | 498 | void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { |
| 489 | u8* const dst_buffer = Memory::GetPointer(addr); | 499 | u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); |
| 490 | if (dst_buffer == nullptr) | 500 | if (dst_buffer == nullptr) |
| 491 | return; | 501 | return; |
| 492 | 502 | ||
| @@ -536,7 +546,8 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint | |||
| 536 | 546 | ||
| 537 | MICROPROFILE_SCOPE(OpenGL_TextureUL); | 547 | MICROPROFILE_SCOPE(OpenGL_TextureUL); |
| 538 | 548 | ||
| 539 | ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); | 549 | ASSERT(gl_buffer_size == |
| 550 | GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); | ||
| 540 | 551 | ||
| 541 | // Load data from memory to the surface | 552 | // Load data from memory to the surface |
| 542 | GLint x0 = static_cast<GLint>(rect.left); | 553 | GLint x0 = static_cast<GLint>(rect.left); |
| @@ -571,11 +582,9 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint | |||
| 571 | glActiveTexture(GL_TEXTURE0); | 582 | glActiveTexture(GL_TEXTURE0); |
| 572 | if (tuple.compressed) { | 583 | if (tuple.compressed) { |
| 573 | glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, | 584 | glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, |
| 574 | static_cast<GLsizei>(rect.GetWidth()), | 585 | static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), |
| 575 | static_cast<GLsizei>(rect.GetHeight()), 0, | 586 | static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, |
| 576 | rect.GetWidth() * rect.GetHeight() * | 587 | size, &gl_buffer[buffer_offset]); |
| 577 | GetGLBytesPerPixel(pixel_format) / tuple.compression_factor, | ||
| 578 | &gl_buffer[buffer_offset]); | ||
| 579 | } else { | 588 | } else { |
| 580 | glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), | 589 | glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), |
| 581 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | 590 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, |
| @@ -945,6 +954,33 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc | |||
| 945 | return surface; | 954 | return surface; |
| 946 | } | 955 | } |
| 947 | 956 | ||
| 957 | boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( | ||
| 958 | VAddr cpu_addr) const { | ||
| 959 | // Tries to find the GPU address of a framebuffer based on the CPU address. This is because | ||
| 960 | // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU | ||
| 961 | // addresses. We iterate through all cached framebuffers, and compare their starting CPU address | ||
| 962 | // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps | ||
| 963 | // surfaces. | ||
| 964 | |||
| 965 | std::vector<Tegra::GPUVAddr> gpu_addresses; | ||
| 966 | for (const auto& pair : surface_cache) { | ||
| 967 | for (const auto& surface : pair.second) { | ||
| 968 | const VAddr surface_cpu_addr = surface->GetCpuAddr(); | ||
| 969 | if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { | ||
| 970 | ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); | ||
| 971 | gpu_addresses.push_back(surface->addr); | ||
| 972 | } | ||
| 973 | } | ||
| 974 | } | ||
| 975 | |||
| 976 | if (gpu_addresses.empty()) { | ||
| 977 | return {}; | ||
| 978 | } | ||
| 979 | |||
| 980 | ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); | ||
| 981 | return gpu_addresses[0]; | ||
| 982 | } | ||
| 983 | |||
| 948 | SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, | 984 | SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, |
| 949 | ScaleMatch match_res_scale, | 985 | ScaleMatch match_res_scale, |
| 950 | bool load_if_create) { | 986 | bool load_if_create) { |
| @@ -1028,11 +1064,11 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu | |||
| 1028 | auto& gpu = Core::System::GetInstance().GPU(); | 1064 | auto& gpu = Core::System::GetInstance().GPU(); |
| 1029 | 1065 | ||
| 1030 | SurfaceParams params; | 1066 | SurfaceParams params; |
| 1031 | params.addr = gpu.memory_manager->PhysicalToVirtualAddress(config.tic.Address()); | 1067 | params.addr = config.tic.Address(); |
| 1032 | params.width = config.tic.Width(); | ||
| 1033 | params.height = config.tic.Height(); | ||
| 1034 | params.is_tiled = config.tic.IsTiled(); | 1068 | params.is_tiled = config.tic.IsTiled(); |
| 1035 | params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); | 1069 | params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); |
| 1070 | params.width = config.tic.Width() / params.GetCompresssionFactor(); | ||
| 1071 | params.height = config.tic.Height() / params.GetCompresssionFactor(); | ||
| 1036 | 1072 | ||
| 1037 | // TODO(Subv): Different types per component are not supported. | 1073 | // TODO(Subv): Different types per component are not supported. |
| 1038 | ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && | 1074 | ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && |
| @@ -1045,7 +1081,7 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu | |||
| 1045 | params.block_height = config.tic.BlockHeight(); | 1081 | params.block_height = config.tic.BlockHeight(); |
| 1046 | } else { | 1082 | } else { |
| 1047 | // Use the texture-provided stride value if the texture isn't tiled. | 1083 | // Use the texture-provided stride value if the texture isn't tiled. |
| 1048 | params.stride = params.PixelsInBytes(config.tic.Pitch()); | 1084 | params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); |
| 1049 | } | 1085 | } |
| 1050 | 1086 | ||
| 1051 | params.UpdateParams(); | 1087 | params.UpdateParams(); |
| @@ -1073,11 +1109,10 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu | |||
| 1073 | SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | 1109 | SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( |
| 1074 | bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { | 1110 | bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { |
| 1075 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | 1111 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; |
| 1076 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | ||
| 1077 | const auto& config = regs.rt[0]; | 1112 | const auto& config = regs.rt[0]; |
| 1078 | 1113 | ||
| 1079 | // TODO(bunnei): This is hard corded to use just the first render buffer | 1114 | // TODO(bunnei): This is hard corded to use just the first render buffer |
| 1080 | LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); | 1115 | NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); |
| 1081 | 1116 | ||
| 1082 | // update resolution_scale_factor and reset cache if changed | 1117 | // update resolution_scale_factor and reset cache if changed |
| 1083 | // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We | 1118 | // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We |
| @@ -1106,7 +1141,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | |||
| 1106 | color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | 1141 | color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; |
| 1107 | SurfaceParams depth_params = color_params; | 1142 | SurfaceParams depth_params = color_params; |
| 1108 | 1143 | ||
| 1109 | color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); | 1144 | color_params.addr = config.Address(); |
| 1110 | color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); | 1145 | color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); |
| 1111 | color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); | 1146 | color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); |
| 1112 | color_params.UpdateParams(); | 1147 | color_params.UpdateParams(); |
| @@ -1122,8 +1157,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | |||
| 1122 | // Make sure that framebuffers don't overlap if both color and depth are being used | 1157 | // Make sure that framebuffers don't overlap if both color and depth are being used |
| 1123 | if (using_color_fb && using_depth_fb && | 1158 | if (using_color_fb && using_depth_fb && |
| 1124 | boost::icl::length(color_vp_interval & depth_vp_interval)) { | 1159 | boost::icl::length(color_vp_interval & depth_vp_interval)) { |
| 1125 | LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " | 1160 | NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " |
| 1126 | "overlapping framebuffers not supported!"); | 1161 | "overlapping framebuffers not supported!"); |
| 1127 | using_depth_fb = false; | 1162 | using_depth_fb = false; |
| 1128 | } | 1163 | } |
| 1129 | 1164 | ||
| @@ -1222,7 +1257,8 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, | |||
| 1222 | } | 1257 | } |
| 1223 | } | 1258 | } |
| 1224 | 1259 | ||
| 1225 | void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) { | 1260 | void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, |
| 1261 | u64 size) { | ||
| 1226 | if (size == 0) | 1262 | if (size == 0) |
| 1227 | return; | 1263 | return; |
| 1228 | 1264 | ||
| @@ -1261,7 +1297,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, | |||
| 1261 | } | 1297 | } |
| 1262 | } | 1298 | } |
| 1263 | 1299 | ||
| 1264 | void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) { | 1300 | void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { |
| 1265 | if (size == 0) | 1301 | if (size == 0) |
| 1266 | return; | 1302 | return; |
| 1267 | 1303 | ||
| @@ -1297,7 +1333,8 @@ void RasterizerCacheOpenGL::FlushAll() { | |||
| 1297 | FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); | 1333 | FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); |
| 1298 | } | 1334 | } |
| 1299 | 1335 | ||
| 1300 | void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { | 1336 | void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, |
| 1337 | const Surface& region_owner) { | ||
| 1301 | if (size == 0) | 1338 | if (size == 0) |
| 1302 | return; | 1339 | return; |
| 1303 | 1340 | ||
| @@ -1390,10 +1427,10 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { | |||
| 1390 | surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); | 1427 | surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); |
| 1391 | } | 1428 | } |
| 1392 | 1429 | ||
| 1393 | void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 1430 | void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { |
| 1394 | const u64 num_pages = | 1431 | const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - |
| 1395 | ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; | 1432 | (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; |
| 1396 | const u64 page_start = addr >> Memory::PAGE_BITS; | 1433 | const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; |
| 1397 | const u64 page_end = page_start + num_pages; | 1434 | const u64 page_end = page_start + num_pages; |
| 1398 | 1435 | ||
| 1399 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | 1436 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to |
| @@ -1406,8 +1443,10 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int del | |||
| 1406 | const auto interval = pair.first & pages_interval; | 1443 | const auto interval = pair.first & pages_interval; |
| 1407 | const int count = pair.second; | 1444 | const int count = pair.second; |
| 1408 | 1445 | ||
| 1409 | const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; | 1446 | const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) |
| 1410 | const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; | 1447 | << Tegra::MemoryManager::PAGE_BITS; |
| 1448 | const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||
| 1449 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 1411 | const u64 interval_size = interval_end_addr - interval_start_addr; | 1450 | const u64 interval_size = interval_end_addr - interval_start_addr; |
| 1412 | 1451 | ||
| 1413 | if (delta > 0 && count == delta) | 1452 | if (delta > 0 && count == delta) |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e4cb3390f..55f1bdee8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -17,12 +17,14 @@ | |||
| 17 | #ifdef __GNUC__ | 17 | #ifdef __GNUC__ |
| 18 | #pragma GCC diagnostic pop | 18 | #pragma GCC diagnostic pop |
| 19 | #endif | 19 | #endif |
| 20 | #include <boost/optional.hpp> | ||
| 20 | #include <glad/glad.h> | 21 | #include <glad/glad.h> |
| 21 | #include "common/assert.h" | 22 | #include "common/assert.h" |
| 22 | #include "common/common_funcs.h" | 23 | #include "common/common_funcs.h" |
| 23 | #include "common/common_types.h" | 24 | #include "common/common_types.h" |
| 24 | #include "common/math_util.h" | 25 | #include "common/math_util.h" |
| 25 | #include "video_core/gpu.h" | 26 | #include "video_core/gpu.h" |
| 27 | #include "video_core/memory_manager.h" | ||
| 26 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 27 | #include "video_core/textures/texture.h" | 29 | #include "video_core/textures/texture.h" |
| 28 | 30 | ||
| @@ -30,9 +32,9 @@ struct CachedSurface; | |||
| 30 | using Surface = std::shared_ptr<CachedSurface>; | 32 | using Surface = std::shared_ptr<CachedSurface>; |
| 31 | using SurfaceSet = std::set<Surface>; | 33 | using SurfaceSet = std::set<Surface>; |
| 32 | 34 | ||
| 33 | using SurfaceRegions = boost::icl::interval_set<VAddr>; | 35 | using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>; |
| 34 | using SurfaceMap = boost::icl::interval_map<VAddr, Surface>; | 36 | using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>; |
| 35 | using SurfaceCache = boost::icl::interval_map<VAddr, SurfaceSet>; | 37 | using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>; |
| 36 | 38 | ||
| 37 | using SurfaceInterval = SurfaceCache::interval_type; | 39 | using SurfaceInterval = SurfaceCache::interval_type; |
| 38 | static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && | 40 | static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && |
| @@ -82,23 +84,49 @@ struct SurfaceParams { | |||
| 82 | Invalid = 4, | 84 | Invalid = 4, |
| 83 | }; | 85 | }; |
| 84 | 86 | ||
| 85 | static constexpr unsigned int GetFormatBpp(PixelFormat format) { | 87 | /** |
| 88 | * Gets the compression factor for the specified PixelFormat. This applies to just the | ||
| 89 | * "compressed width" and "compressed height", not the overall compression factor of a | ||
| 90 | * compressed image. This is used for maintaining proper surface sizes for compressed texture | ||
| 91 | * formats. | ||
| 92 | */ | ||
| 93 | static constexpr u32 GetCompresssionFactor(PixelFormat format) { | ||
| 86 | if (format == PixelFormat::Invalid) | 94 | if (format == PixelFormat::Invalid) |
| 87 | return 0; | 95 | return 0; |
| 88 | 96 | ||
| 89 | constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = { | 97 | constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ |
| 98 | 1, // ABGR8 | ||
| 99 | 1, // B5G6R5 | ||
| 100 | 1, // A2B10G10R10 | ||
| 101 | 4, // DXT1 | ||
| 102 | 4, // DXT23 | ||
| 103 | 4, // DXT45 | ||
| 104 | }}; | ||
| 105 | |||
| 106 | ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); | ||
| 107 | return compression_factor_table[static_cast<size_t>(format)]; | ||
| 108 | } | ||
| 109 | u32 GetCompresssionFactor() const { | ||
| 110 | return GetCompresssionFactor(pixel_format); | ||
| 111 | } | ||
| 112 | |||
| 113 | static constexpr u32 GetFormatBpp(PixelFormat format) { | ||
| 114 | if (format == PixelFormat::Invalid) | ||
| 115 | return 0; | ||
| 116 | |||
| 117 | constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | ||
| 90 | 32, // ABGR8 | 118 | 32, // ABGR8 |
| 91 | 16, // B5G6R5 | 119 | 16, // B5G6R5 |
| 92 | 32, // A2B10G10R10 | 120 | 32, // A2B10G10R10 |
| 93 | 64, // DXT1 | 121 | 64, // DXT1 |
| 94 | 128, // DXT23 | 122 | 128, // DXT23 |
| 95 | 128, // DXT45 | 123 | 128, // DXT45 |
| 96 | }; | 124 | }}; |
| 97 | 125 | ||
| 98 | ASSERT(static_cast<size_t>(format) < bpp_table.size()); | 126 | ASSERT(static_cast<size_t>(format) < bpp_table.size()); |
| 99 | return bpp_table[static_cast<size_t>(format)]; | 127 | return bpp_table[static_cast<size_t>(format)]; |
| 100 | } | 128 | } |
| 101 | unsigned int GetFormatBpp() const { | 129 | u32 GetFormatBpp() const { |
| 102 | return GetFormatBpp(pixel_format); | 130 | return GetFormatBpp(pixel_format); |
| 103 | } | 131 | } |
| 104 | 132 | ||
| @@ -106,6 +134,8 @@ struct SurfaceParams { | |||
| 106 | switch (format) { | 134 | switch (format) { |
| 107 | case Tegra::RenderTargetFormat::RGBA8_UNORM: | 135 | case Tegra::RenderTargetFormat::RGBA8_UNORM: |
| 108 | return PixelFormat::ABGR8; | 136 | return PixelFormat::ABGR8; |
| 137 | case Tegra::RenderTargetFormat::RGB10_A2_UNORM: | ||
| 138 | return PixelFormat::A2B10G10R10; | ||
| 109 | default: | 139 | default: |
| 110 | NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); | 140 | NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); |
| 111 | UNREACHABLE(); | 141 | UNREACHABLE(); |
| @@ -251,6 +281,24 @@ struct SurfaceParams { | |||
| 251 | // Returns the region of the biggest valid rectange within interval | 281 | // Returns the region of the biggest valid rectange within interval |
| 252 | SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; | 282 | SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; |
| 253 | 283 | ||
| 284 | /** | ||
| 285 | * Gets the actual width (in pixels) of the surface. This is provided because `width` is used | ||
| 286 | * for tracking the surface region in memory, which may be compressed for certain formats. In | ||
| 287 | * this scenario, `width` is actually the compressed width. | ||
| 288 | */ | ||
| 289 | u32 GetActualWidth() const { | ||
| 290 | return width * GetCompresssionFactor(); | ||
| 291 | } | ||
| 292 | |||
| 293 | /** | ||
| 294 | * Gets the actual height (in pixels) of the surface. This is provided because `height` is used | ||
| 295 | * for tracking the surface region in memory, which may be compressed for certain formats. In | ||
| 296 | * this scenario, `height` is actually the compressed height. | ||
| 297 | */ | ||
| 298 | u32 GetActualHeight() const { | ||
| 299 | return height * GetCompresssionFactor(); | ||
| 300 | } | ||
| 301 | |||
| 254 | u32 GetScaledWidth() const { | 302 | u32 GetScaledWidth() const { |
| 255 | return width * res_scale; | 303 | return width * res_scale; |
| 256 | } | 304 | } |
| @@ -275,6 +323,8 @@ struct SurfaceParams { | |||
| 275 | return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; | 323 | return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; |
| 276 | } | 324 | } |
| 277 | 325 | ||
| 326 | VAddr GetCpuAddr() const; | ||
| 327 | |||
| 278 | bool ExactMatch(const SurfaceParams& other_surface) const; | 328 | bool ExactMatch(const SurfaceParams& other_surface) const; |
| 279 | bool CanSubRect(const SurfaceParams& sub_surface) const; | 329 | bool CanSubRect(const SurfaceParams& sub_surface) const; |
| 280 | bool CanExpand(const SurfaceParams& expanded_surface) const; | 330 | bool CanExpand(const SurfaceParams& expanded_surface) const; |
| @@ -283,8 +333,9 @@ struct SurfaceParams { | |||
| 283 | MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; | 333 | MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; |
| 284 | MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; | 334 | MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; |
| 285 | 335 | ||
| 286 | VAddr addr = 0; | 336 | Tegra::GPUVAddr addr = 0; |
| 287 | VAddr end = 0; | 337 | Tegra::GPUVAddr end = 0; |
| 338 | boost::optional<VAddr> cpu_addr; | ||
| 288 | u64 size = 0; | 339 | u64 size = 0; |
| 289 | 340 | ||
| 290 | u32 width = 0; | 341 | u32 width = 0; |
| @@ -323,15 +374,15 @@ struct CachedSurface : SurfaceParams { | |||
| 323 | if (format == PixelFormat::Invalid) | 374 | if (format == PixelFormat::Invalid) |
| 324 | return 0; | 375 | return 0; |
| 325 | 376 | ||
| 326 | return SurfaceParams::GetFormatBpp(format) / 8; | 377 | return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 327 | } | 378 | } |
| 328 | 379 | ||
| 329 | std::unique_ptr<u8[]> gl_buffer; | 380 | std::unique_ptr<u8[]> gl_buffer; |
| 330 | size_t gl_buffer_size = 0; | 381 | size_t gl_buffer_size = 0; |
| 331 | 382 | ||
| 332 | // Read/Write data in Switch memory to/from gl_buffer | 383 | // Read/Write data in Switch memory to/from gl_buffer |
| 333 | void LoadGLBuffer(VAddr load_start, VAddr load_end); | 384 | void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); |
| 334 | void FlushGLBuffer(VAddr flush_start, VAddr flush_end); | 385 | void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); |
| 335 | 386 | ||
| 336 | // Upload/Download data in gl_buffer in/to this surface's texture | 387 | // Upload/Download data in gl_buffer in/to this surface's texture |
| 337 | void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | 388 | void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, |
| @@ -360,6 +411,9 @@ public: | |||
| 360 | Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, | 411 | Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, |
| 361 | bool load_if_create); | 412 | bool load_if_create); |
| 362 | 413 | ||
| 414 | /// Tries to find a framebuffer GPU address based on the provided CPU address | ||
| 415 | boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const; | ||
| 416 | |||
| 363 | /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from | 417 | /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from |
| 364 | /// Switch memory to OpenGL and caches it (if not already cached) | 418 | /// Switch memory to OpenGL and caches it (if not already cached) |
| 365 | SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, | 419 | SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, |
| @@ -379,10 +433,10 @@ public: | |||
| 379 | SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); | 433 | SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); |
| 380 | 434 | ||
| 381 | /// Write any cached resources overlapping the region back to memory (if dirty) | 435 | /// Write any cached resources overlapping the region back to memory (if dirty) |
| 382 | void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr); | 436 | void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); |
| 383 | 437 | ||
| 384 | /// Mark region as being invalidated by region_owner (nullptr if Switch memory) | 438 | /// Mark region as being invalidated by region_owner (nullptr if Switch memory) |
| 385 | void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner); | 439 | void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); |
| 386 | 440 | ||
| 387 | /// Flush all cached resources tracked by this cache manager | 441 | /// Flush all cached resources tracked by this cache manager |
| 388 | void FlushAll(); | 442 | void FlushAll(); |
| @@ -391,7 +445,7 @@ private: | |||
| 391 | void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); | 445 | void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); |
| 392 | 446 | ||
| 393 | /// Update surface's texture for given region when necessary | 447 | /// Update surface's texture for given region when necessary |
| 394 | void ValidateSurface(const Surface& surface, VAddr addr, u64 size); | 448 | void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); |
| 395 | 449 | ||
| 396 | /// Create a new surface | 450 | /// Create a new surface |
| 397 | Surface CreateSurface(const SurfaceParams& params); | 451 | Surface CreateSurface(const SurfaceParams& params); |
| @@ -403,7 +457,7 @@ private: | |||
| 403 | void UnregisterSurface(const Surface& surface); | 457 | void UnregisterSurface(const Surface& surface); |
| 404 | 458 | ||
| 405 | /// Increase/decrease the number of surface in pages touching the specified region | 459 | /// Increase/decrease the number of surface in pages touching the specified region |
| 406 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta); | 460 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); |
| 407 | 461 | ||
| 408 | SurfaceCache surface_cache; | 462 | SurfaceCache surface_cache; |
| 409 | PageMap cached_pages; | 463 | PageMap cached_pages; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 086424395..3dffb205d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -519,7 +519,7 @@ private: | |||
| 519 | } | 519 | } |
| 520 | break; | 520 | break; |
| 521 | } | 521 | } |
| 522 | case OpCode::Type::FloatPredicate: { | 522 | case OpCode::Type::FloatSetPredicate: { |
| 523 | std::string op_a = instr.fsetp.neg_a ? "-" : ""; | 523 | std::string op_a = instr.fsetp.neg_a ? "-" : ""; |
| 524 | op_a += GetRegister(instr.gpr8); | 524 | op_a += GetRegister(instr.gpr8); |
| 525 | 525 | ||
| @@ -570,6 +570,59 @@ private: | |||
| 570 | } | 570 | } |
| 571 | break; | 571 | break; |
| 572 | } | 572 | } |
| 573 | case OpCode::Type::FloatSet: { | ||
| 574 | std::string dest = GetRegister(instr.gpr0); | ||
| 575 | std::string op_a = instr.fset.neg_a ? "-" : ""; | ||
| 576 | op_a += GetRegister(instr.gpr8); | ||
| 577 | |||
| 578 | if (instr.fset.abs_a) { | ||
| 579 | op_a = "abs(" + op_a + ')'; | ||
| 580 | } | ||
| 581 | |||
| 582 | std::string op_b = instr.fset.neg_b ? "-" : ""; | ||
| 583 | |||
| 584 | if (instr.is_b_imm) { | ||
| 585 | std::string imm = GetImmediate19(instr); | ||
| 586 | if (instr.fset.neg_imm) | ||
| 587 | op_b += "(-" + imm + ')'; | ||
| 588 | else | ||
| 589 | op_b += imm; | ||
| 590 | } else { | ||
| 591 | if (instr.is_b_gpr) { | ||
| 592 | op_b += GetRegister(instr.gpr20); | ||
| 593 | } else { | ||
| 594 | op_b += GetUniform(instr.uniform); | ||
| 595 | } | ||
| 596 | } | ||
| 597 | |||
| 598 | if (instr.fset.abs_b) { | ||
| 599 | op_b = "abs(" + op_b + ")"; | ||
| 600 | } | ||
| 601 | |||
| 602 | using Tegra::Shader::Pred; | ||
| 603 | ASSERT_MSG(instr.fset.pred39 == static_cast<u64>(Pred::UnusedIndex), | ||
| 604 | "Compound predicates are not implemented"); | ||
| 605 | |||
| 606 | // The fset instruction sets a register to 1.0 if the condition is true, and to 0 | ||
| 607 | // otherwise. | ||
| 608 | using Tegra::Shader::PredCondition; | ||
| 609 | switch (instr.fset.cond) { | ||
| 610 | case PredCondition::LessThan: | ||
| 611 | SetDest(0, dest, "((" + op_a + ") < (" + op_b + ")) ? 1.0 : 0", 1, 1); | ||
| 612 | break; | ||
| 613 | case PredCondition::Equal: | ||
| 614 | SetDest(0, dest, "((" + op_a + ") == (" + op_b + ")) ? 1.0 : 0", 1, 1); | ||
| 615 | break; | ||
| 616 | case PredCondition::GreaterThan: | ||
| 617 | SetDest(0, dest, "((" + op_a + ") > (" + op_b + ")) ? 1.0 : 0", 1, 1); | ||
| 618 | break; | ||
| 619 | default: | ||
| 620 | NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})", | ||
| 621 | static_cast<unsigned>(instr.fset.cond.Value()), op_a, op_b); | ||
| 622 | UNREACHABLE(); | ||
| 623 | } | ||
| 624 | break; | ||
| 625 | } | ||
| 573 | default: { | 626 | default: { |
| 574 | switch (opcode->GetId()) { | 627 | switch (opcode->GetId()) { |
| 575 | case OpCode::Id::EXIT: { | 628 | case OpCode::Id::EXIT: { |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index ab0acb20a..77d1692f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -152,7 +152,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 152 | screen_info.display_texture = screen_info.texture.resource.handle; | 152 | screen_info.display_texture = screen_info.texture.resource.handle; |
| 153 | screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); | 153 | screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); |
| 154 | 154 | ||
| 155 | Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes); | 155 | Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, |
| 156 | Memory::FlushMode::Flush); | ||
| 156 | 157 | ||
| 157 | VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, | 158 | VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, |
| 158 | Memory::GetPointer(framebuffer_addr), | 159 | Memory::GetPointer(framebuffer_addr), |
| @@ -269,10 +270,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | |||
| 269 | GLint internal_format; | 270 | GLint internal_format; |
| 270 | switch (framebuffer.pixel_format) { | 271 | switch (framebuffer.pixel_format) { |
| 271 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | 272 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: |
| 272 | // Use RGBA8 and swap in the fragment shader | ||
| 273 | internal_format = GL_RGBA; | 273 | internal_format = GL_RGBA; |
| 274 | texture.gl_format = GL_RGBA; | 274 | texture.gl_format = GL_RGBA; |
| 275 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8; | 275 | texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 276 | gl_framebuffer_data.resize(texture.width * texture.height * 4); | 276 | gl_framebuffer_data.resize(texture.width * texture.height * 4); |
| 277 | break; | 277 | break; |
| 278 | default: | 278 | default: |
| @@ -295,17 +295,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 295 | const auto& texcoords = screen_info.display_texcoords; | 295 | const auto& texcoords = screen_info.display_texcoords; |
| 296 | auto left = texcoords.left; | 296 | auto left = texcoords.left; |
| 297 | auto right = texcoords.right; | 297 | auto right = texcoords.right; |
| 298 | if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) | 298 | if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) { |
| 299 | if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) { | 299 | if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) { |
| 300 | // Flip the framebuffer vertically | 300 | // Flip the framebuffer vertically |
| 301 | left = texcoords.right; | 301 | left = texcoords.right; |
| 302 | right = texcoords.left; | 302 | right = texcoords.left; |
| 303 | } else { | 303 | } else { |
| 304 | // Other transformations are unsupported | 304 | // Other transformations are unsupported |
| 305 | LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags=%d", | 305 | NGLOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", |
| 306 | framebuffer_transform_flags); | 306 | static_cast<u32>(framebuffer_transform_flags)); |
| 307 | UNIMPLEMENTED(); | 307 | UNIMPLEMENTED(); |
| 308 | } | 308 | } |
| 309 | } | ||
| 309 | 310 | ||
| 310 | std::array<ScreenRectVertex, 4> vertices = {{ | 311 | std::array<ScreenRectVertex, 4> vertices = {{ |
| 311 | ScreenRectVertex(x, y, texcoords.top, left), | 312 | ScreenRectVertex(x, y, texcoords.top, left), |
| @@ -427,9 +428,9 @@ bool RendererOpenGL::Init() { | |||
| 427 | const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; | 428 | const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; |
| 428 | const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; | 429 | const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; |
| 429 | 430 | ||
| 430 | LOG_INFO(Render_OpenGL, "GL_VERSION: %s", gl_version); | 431 | NGLOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); |
| 431 | LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", gpu_vendor); | 432 | NGLOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); |
| 432 | LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", gpu_model); | 433 | NGLOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); |
| 433 | 434 | ||
| 434 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); | 435 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); |
| 435 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); | 436 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index e0509f0ce..8b39b2bdf 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "core/memory.h" | ||
| 7 | #include "video_core/textures/decoders.h" | 8 | #include "video_core/textures/decoders.h" |
| 8 | #include "video_core/textures/texture.h" | 9 | #include "video_core/textures/texture.h" |
| 9 | 10 | ||
| @@ -26,9 +27,8 @@ static u32 GetSwizzleOffset(u32 x, u32 y, u32 image_width, u32 bytes_per_pixel, | |||
| 26 | return address; | 27 | return address; |
| 27 | } | 28 | } |
| 28 | 29 | ||
| 29 | static void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, | 30 | void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, |
| 30 | u8* swizzled_data, u8* unswizzled_data, bool unswizzle, | 31 | u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) { |
| 31 | u32 block_height) { | ||
| 32 | u8* data_ptrs[2]; | 32 | u8* data_ptrs[2]; |
| 33 | for (unsigned y = 0; y < height; ++y) { | 33 | for (unsigned y = 0; y < height; ++y) { |
| 34 | for (unsigned x = 0; x < width; ++x) { | 34 | for (unsigned x = 0; x < width; ++x) { |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index a700911cf..2562c4b06 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -17,6 +17,10 @@ namespace Texture { | |||
| 17 | std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, | 17 | std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, |
| 18 | u32 block_height = TICEntry::DefaultBlockHeight); | 18 | u32 block_height = TICEntry::DefaultBlockHeight); |
| 19 | 19 | ||
| 20 | /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. | ||
| 21 | void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, | ||
| 22 | u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height); | ||
| 23 | |||
| 20 | /** | 24 | /** |
| 21 | * Decodes an unswizzled texture into a A8R8G8B8 texture. | 25 | * Decodes an unswizzled texture into a A8R8G8B8 texture. |
| 22 | */ | 26 | */ |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 289140f31..89dc8ed1e 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -24,9 +24,9 @@ bool Init(EmuWindow* emu_window) { | |||
| 24 | g_renderer = std::make_unique<RendererOpenGL>(); | 24 | g_renderer = std::make_unique<RendererOpenGL>(); |
| 25 | g_renderer->SetWindow(g_emu_window); | 25 | g_renderer->SetWindow(g_emu_window); |
| 26 | if (g_renderer->Init()) { | 26 | if (g_renderer->Init()) { |
| 27 | LOG_DEBUG(Render, "initialized OK"); | 27 | NGLOG_DEBUG(Render, "initialized OK"); |
| 28 | } else { | 28 | } else { |
| 29 | LOG_CRITICAL(Render, "initialization failed !"); | 29 | NGLOG_CRITICAL(Render, "initialization failed !"); |
| 30 | return false; | 30 | return false; |
| 31 | } | 31 | } |
| 32 | return true; | 32 | return true; |
| @@ -36,7 +36,7 @@ bool Init(EmuWindow* emu_window) { | |||
| 36 | void Shutdown() { | 36 | void Shutdown() { |
| 37 | g_renderer.reset(); | 37 | g_renderer.reset(); |
| 38 | 38 | ||
| 39 | LOG_DEBUG(Render, "shutdown OK"); | 39 | NGLOG_DEBUG(Render, "shutdown OK"); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | } // namespace VideoCore | 42 | } // namespace VideoCore |