diff options
| author | 2022-11-05 22:26:38 +0100 | |
|---|---|---|
| committer | 2022-11-24 20:35:44 +0100 | |
| commit | 957840be9151e7c3b97b638cc0d10d73173c4036 (patch) | |
| tree | bf3f3aa7b612265fd19db8297ee09d71c819abe7 /src | |
| parent | Merge pull request #9299 from lioncash/cast (diff) | |
| download | yuzu-957840be9151e7c3b97b638cc0d10d73173c4036.tar.gz yuzu-957840be9151e7c3b97b638cc0d10d73173c4036.tar.xz yuzu-957840be9151e7c3b97b638cc0d10d73173c4036.zip | |
Fermi2D: Rework blit engine and add a software blitter.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/video_core/control/channel_state.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 9 | ||||
| -rw-r--r-- | src/video_core/engines/sw_blitter/blitter.cpp | 213 | ||||
| -rw-r--r-- | src/video_core/engines/sw_blitter/blitter.h | 27 | ||||
| -rw-r--r-- | src/video_core/engines/sw_blitter/converter.cpp | 1097 | ||||
| -rw-r--r-- | src/video_core/engines/sw_blitter/converter.h | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 29 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 8 |
12 files changed, 1431 insertions, 18 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d7f7d336c..b03a30992 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -28,6 +28,10 @@ add_library(video_core STATIC | |||
| 28 | dirty_flags.h | 28 | dirty_flags.h |
| 29 | dma_pusher.cpp | 29 | dma_pusher.cpp |
| 30 | dma_pusher.h | 30 | dma_pusher.h |
| 31 | engines/sw_blitter/blitter.cpp | ||
| 32 | engines/sw_blitter/blitter.h | ||
| 33 | engines/sw_blitter/converter.cpp | ||
| 34 | engines/sw_blitter/converter.h | ||
| 31 | engines/const_buffer_info.h | 35 | engines/const_buffer_info.h |
| 32 | engines/engine_interface.h | 36 | engines/engine_interface.h |
| 33 | engines/engine_upload.cpp | 37 | engines/engine_upload.cpp |
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index cdecc3a91..832025d75 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp | |||
| @@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) { | |||
| 20 | ASSERT(memory_manager); | 20 | ASSERT(memory_manager); |
| 21 | dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this); | 21 | dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this); |
| 22 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); | 22 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); |
| 23 | fermi_2d = std::make_unique<Engines::Fermi2D>(); | 23 | fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); |
| 24 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager); | 24 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager); |
| 25 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); | 25 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); |
| 26 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | 26 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 453e0fb01..2c722c778 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -3,17 +3,25 @@ | |||
| 3 | 3 | ||
| 4 | #include "common/assert.h" | 4 | #include "common/assert.h" |
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "common/microprofile.h" | ||
| 6 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 7 | #include "video_core/memory_manager.h" | 8 | #include "video_core/engines/sw_blitter/blitter.h" |
| 8 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 9 | #include "video_core/surface.h" | 10 | #include "video_core/surface.h" |
| 11 | #include "video_core/textures/decoders.h" | ||
| 12 | |||
| 13 | MICROPROFILE_DECLARE(GPU_BlitEngine); | ||
| 14 | MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128)); | ||
| 10 | 15 | ||
| 11 | using VideoCore::Surface::BytesPerBlock; | 16 | using VideoCore::Surface::BytesPerBlock; |
| 12 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | 17 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 13 | 18 | ||
| 14 | namespace Tegra::Engines { | 19 | namespace Tegra::Engines { |
| 15 | 20 | ||
| 16 | Fermi2D::Fermi2D() { | 21 | using namespace Texture; |
| 22 | |||
| 23 | Fermi2D::Fermi2D(MemoryManager& memory_manager_) { | ||
| 24 | sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); | ||
| 17 | // Nvidia's OpenGL driver seems to assume these values | 25 | // Nvidia's OpenGL driver seems to assume these values |
| 18 | regs.src.depth = 1; | 26 | regs.src.depth = 1; |
| 19 | regs.dst.depth = 1; | 27 | regs.dst.depth = 1; |
| @@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 | |||
| 42 | } | 50 | } |
| 43 | 51 | ||
| 44 | void Fermi2D::Blit() { | 52 | void Fermi2D::Blit() { |
| 53 | MICROPROFILE_SCOPE(GPU_BlitEngine); | ||
| 45 | LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", | 54 | LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", |
| 46 | regs.src.Address(), regs.dst.Address()); | 55 | regs.src.Address(), regs.dst.Address()); |
| 47 | 56 | ||
| @@ -52,9 +61,12 @@ void Fermi2D::Blit() { | |||
| 52 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); | 61 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); |
| 53 | 62 | ||
| 54 | const auto& args = regs.pixels_from_memory; | 63 | const auto& args = regs.pixels_from_memory; |
| 64 | constexpr s64 null_derivate = 1ULL << 32; | ||
| 55 | Config config{ | 65 | Config config{ |
| 56 | .operation = regs.operation, | 66 | .operation = regs.operation, |
| 57 | .filter = args.sample_mode.filter, | 67 | .filter = args.sample_mode.filter, |
| 68 | .must_accelerate = args.du_dx != null_derivate || args.dv_dy != null_derivate || | ||
| 69 | args.sample_mode.filter == Filter::Bilinear, | ||
| 58 | .dst_x0 = args.dst_x0, | 70 | .dst_x0 = args.dst_x0, |
| 59 | .dst_y0 = args.dst_y0, | 71 | .dst_y0 = args.dst_y0, |
| 60 | .dst_x1 = args.dst_x0 + args.dst_width, | 72 | .dst_x1 = args.dst_x0 + args.dst_width, |
| @@ -78,8 +90,9 @@ void Fermi2D::Blit() { | |||
| 78 | config.src_x1 -= config.src_x0; | 90 | config.src_x1 -= config.src_x0; |
| 79 | config.src_x0 = 0; | 91 | config.src_x0 = 0; |
| 80 | } | 92 | } |
| 93 | |||
| 81 | if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { | 94 | if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { |
| 82 | UNIMPLEMENTED(); | 95 | sw_blitter->Blit(src, regs.dst, config); |
| 83 | } | 96 | } |
| 84 | } | 97 | } |
| 85 | 98 | ||
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 1229aa35b..24b518cb5 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <memory> | ||
| 8 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 9 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -21,6 +22,10 @@ class RasterizerInterface; | |||
| 21 | 22 | ||
| 22 | namespace Tegra::Engines { | 23 | namespace Tegra::Engines { |
| 23 | 24 | ||
| 25 | namespace Blitter { | ||
| 26 | class SoftwareBlitEngine; | ||
| 27 | } | ||
| 28 | |||
| 24 | /** | 29 | /** |
| 25 | * This Engine is known as G80_2D. Documentation can be found in: | 30 | * This Engine is known as G80_2D. Documentation can be found in: |
| 26 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml | 31 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml |
| @@ -32,7 +37,7 @@ namespace Tegra::Engines { | |||
| 32 | 37 | ||
| 33 | class Fermi2D final : public EngineInterface { | 38 | class Fermi2D final : public EngineInterface { |
| 34 | public: | 39 | public: |
| 35 | explicit Fermi2D(); | 40 | explicit Fermi2D(MemoryManager& memory_manager_); |
| 36 | ~Fermi2D() override; | 41 | ~Fermi2D() override; |
| 37 | 42 | ||
| 38 | /// Binds a rasterizer to this engine. | 43 | /// Binds a rasterizer to this engine. |
| @@ -286,6 +291,7 @@ public: | |||
| 286 | struct Config { | 291 | struct Config { |
| 287 | Operation operation; | 292 | Operation operation; |
| 288 | Filter filter; | 293 | Filter filter; |
| 294 | bool must_accelerate; | ||
| 289 | s32 dst_x0; | 295 | s32 dst_x0; |
| 290 | s32 dst_y0; | 296 | s32 dst_y0; |
| 291 | s32 dst_x1; | 297 | s32 dst_x1; |
| @@ -298,6 +304,7 @@ public: | |||
| 298 | 304 | ||
| 299 | private: | 305 | private: |
| 300 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 306 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 307 | std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; | ||
| 301 | 308 | ||
| 302 | /// Performs the copy from the source surface to the destination surface as configured in the | 309 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 303 | /// registers. | 310 | /// registers. |
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp new file mode 100644 index 000000000..caf51cbe3 --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.cpp | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <vector> | ||
| 5 | |||
| 6 | #include "video_core/engines/sw_blitter/blitter.h" | ||
| 7 | #include "video_core/engines/sw_blitter/converter.h" | ||
| 8 | #include "video_core/memory_manager.h" | ||
| 9 | #include "video_core/surface.h" | ||
| 10 | #include "video_core/textures/decoders.h" | ||
| 11 | |||
| 12 | namespace Tegra { | ||
| 13 | class MemoryManager; | ||
| 14 | } | ||
| 15 | |||
| 16 | using VideoCore::Surface::BytesPerBlock; | ||
| 17 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 18 | |||
| 19 | namespace Tegra::Engines::Blitter { | ||
| 20 | |||
| 21 | using namespace Texture; | ||
| 22 | |||
| 23 | namespace { | ||
| 24 | |||
| 25 | void NeighrestNeighbor(std::span<u8> input, std::span<u8> output, u32 src_width, u32 src_height, | ||
| 26 | u32 dst_width, u32 dst_height, size_t bpp) { | ||
| 27 | const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); | ||
| 28 | const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); | ||
| 29 | size_t src_y = 0; | ||
| 30 | for (u32 y = 0; y < dst_height; y++) { | ||
| 31 | size_t src_x = 0; | ||
| 32 | for (u32 x = 0; x < dst_width; x++) { | ||
| 33 | const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp; | ||
| 34 | const size_t write_to = (y * dst_width + x) * bpp; | ||
| 35 | |||
| 36 | std::memcpy(&output[write_to], &input[read_from], bpp); | ||
| 37 | src_x += dx_du; | ||
| 38 | } | ||
| 39 | src_y += dy_dv; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | void NeighrestNeighborFast(std::span<f32> input, std::span<f32> output, u32 src_width, | ||
| 44 | u32 src_height, u32 dst_width, u32 dst_height) { | ||
| 45 | const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); | ||
| 46 | const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); | ||
| 47 | size_t src_y = 0; | ||
| 48 | for (u32 y = 0; y < dst_height; y++) { | ||
| 49 | size_t src_x = 0; | ||
| 50 | for (u32 x = 0; x < dst_width; x++) { | ||
| 51 | const size_t read_from = ((src_y * src_width + src_x) >> 32) * 4; | ||
| 52 | const size_t write_to = (y * dst_width + x) * 4; | ||
| 53 | |||
| 54 | std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * 4); | ||
| 55 | src_x += dx_du; | ||
| 56 | } | ||
| 57 | src_y += dy_dv; | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | /* | ||
| 62 | void Bilinear(std::span<f32> input, std::span<f32> output, size_t src_width, | ||
| 63 | size_t src_height, size_t dst_width, size_t dst_height) { | ||
| 64 | const auto inv_lerp = [](u32 coord, u32 end) { return | ||
| 65 | static_cast<f32>(std::min(std::max(static_cast<s32>(coord), 0), end - 1)) / (end); }; | ||
| 66 | |||
| 67 | |||
| 68 | for (u32 y = 0; y < dst_height; y++) { | ||
| 69 | const f32 ty_0 = inv_lerp(y, dst_extent_y); | ||
| 70 | const f32 ty_1 = inv_lerp(y + 1, dst_extent_y); | ||
| 71 | for (u32 x = 0; x < dst_width; x++) { | ||
| 72 | const f32 tx_0 = inv_lerp(x, dst_extent_x); | ||
| 73 | const f32 tx_1 = inv_lerp(x + 1, dst_extent_x); | ||
| 74 | const std::array<f32, 4> get_pixel = [&](f32 tx, f32 ty, u32 width, u32 height) { | ||
| 75 | std::array<f32, 4> result{}; | ||
| 76 | |||
| 77 | return (std::llround(width * tx) + std::llround(height * ty) * width) * 4; | ||
| 78 | }; | ||
| 79 | std::array<f32, 4> result{}; | ||
| 80 | |||
| 81 | const size_t read_from = get_pixel(src_width, src_height); | ||
| 82 | const size_t write_to = get_pixel(tx_0, ty_0, dst_width, dst_height); | ||
| 83 | |||
| 84 | std::memcpy(&output[write_to], &input[read_from], bpp); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | } | ||
| 88 | */ | ||
| 89 | |||
| 90 | } // namespace | ||
| 91 | |||
| 92 | struct SoftwareBlitEngine::BlitEngineImpl { | ||
| 93 | std::vector<u8> tmp_buffer; | ||
| 94 | std::vector<u8> src_buffer; | ||
| 95 | std::vector<u8> dst_buffer; | ||
| 96 | std::vector<f32> intermediate_src; | ||
| 97 | std::vector<f32> intermediate_dst; | ||
| 98 | ConverterFactory converter_factory; | ||
| 99 | }; | ||
| 100 | |||
| 101 | SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_) | ||
| 102 | : memory_manager{memory_manager_} { | ||
| 103 | impl = std::make_unique<BlitEngineImpl>(); | ||
| 104 | } | ||
| 105 | |||
| 106 | SoftwareBlitEngine::~SoftwareBlitEngine() = default; | ||
| 107 | |||
| 108 | bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | ||
| 109 | Fermi2D::Config& config) { | ||
| 110 | UNIMPLEMENTED_IF(config.filter == Fermi2D::Filter::Bilinear); | ||
| 111 | |||
| 112 | const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) { | ||
| 113 | if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||
| 114 | return CalculateSize(true, bytes_per_pixel, surface.width, surface.height, | ||
| 115 | surface.depth, surface.block_height, surface.block_depth); | ||
| 116 | } | ||
| 117 | return static_cast<size_t>(surface.pitch * surface.height); | ||
| 118 | }; | ||
| 119 | const auto process_pitch_linear = [](bool unpack, std::span<u8> input, std::span<u8> output, | ||
| 120 | u32 extent_x, u32 extent_y, u32 pitch, u32 x0, u32 y0, | ||
| 121 | size_t bpp) { | ||
| 122 | const size_t base_offset = x0 * bpp; | ||
| 123 | const size_t copy_size = extent_x * bpp; | ||
| 124 | for (u32 y = y0; y < extent_y; y++) { | ||
| 125 | const size_t first_offset = y * pitch + base_offset; | ||
| 126 | const size_t second_offset = y * extent_x * bpp; | ||
| 127 | u8* write_to = unpack ? &output[first_offset] : &output[second_offset]; | ||
| 128 | const u8* read_from = unpack ? &input[second_offset] : &input[first_offset]; | ||
| 129 | std::memcpy(write_to, read_from, copy_size); | ||
| 130 | } | ||
| 131 | }; | ||
| 132 | |||
| 133 | const u32 src_extent_x = config.src_x1 - config.src_x0; | ||
| 134 | const u32 src_extent_y = config.src_y1 - config.src_y0; | ||
| 135 | |||
| 136 | const u32 dst_extent_x = config.dst_x1 - config.dst_x0; | ||
| 137 | const u32 dst_extent_y = config.dst_y1 - config.dst_y0; | ||
| 138 | const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | ||
| 139 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | ||
| 140 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | ||
| 141 | impl->tmp_buffer.resize(src_size); | ||
| 142 | memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); | ||
| 143 | |||
| 144 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | ||
| 145 | |||
| 146 | const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; | ||
| 147 | |||
| 148 | impl->src_buffer.resize(src_copy_size); | ||
| 149 | |||
| 150 | const bool no_passthrough = | ||
| 151 | src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y; | ||
| 152 | |||
| 153 | const auto convertion_phase_same_format = [&]() { | ||
| 154 | NeighrestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, | ||
| 155 | dst_extent_x, dst_extent_y, dst_bytes_per_pixel); | ||
| 156 | }; | ||
| 157 | |||
| 158 | const auto convertion_phase_ir = [&]() { | ||
| 159 | auto* input_converter = impl->converter_factory.GetFormatConverter(src.format); | ||
| 160 | impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * 4); | ||
| 161 | impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * 4); | ||
| 162 | input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); | ||
| 163 | |||
| 164 | NeighrestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, | ||
| 165 | src_extent_y, dst_extent_x, dst_extent_y); | ||
| 166 | |||
| 167 | auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format); | ||
| 168 | output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer); | ||
| 169 | }; | ||
| 170 | |||
| 171 | // Do actuall Blit | ||
| 172 | |||
| 173 | impl->dst_buffer.resize(dst_copy_size); | ||
| 174 | if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||
| 175 | UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, | ||
| 176 | src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, | ||
| 177 | src_extent_y, src.block_height, src.block_depth, | ||
| 178 | src_extent_x * src_bytes_per_pixel); | ||
| 179 | } else { | ||
| 180 | process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, | ||
| 181 | src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); | ||
| 182 | } | ||
| 183 | |||
| 184 | // Conversion Phase | ||
| 185 | if (no_passthrough) { | ||
| 186 | if (src.format != dst.format) { | ||
| 187 | convertion_phase_ir(); | ||
| 188 | } else { | ||
| 189 | convertion_phase_same_format(); | ||
| 190 | } | ||
| 191 | } else { | ||
| 192 | impl->dst_buffer.swap(impl->src_buffer); | ||
| 193 | } | ||
| 194 | |||
| 195 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | ||
| 196 | impl->tmp_buffer.resize(dst_size); | ||
| 197 | memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); | ||
| 198 | |||
| 199 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||
| 200 | SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, | ||
| 201 | dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, | ||
| 202 | dst_extent_y, dst.block_height, dst.block_depth, | ||
| 203 | dst_extent_x * dst_bytes_per_pixel); | ||
| 204 | } else { | ||
| 205 | process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, | ||
| 206 | dst.pitch, config.dst_x0, config.dst_y0, | ||
| 207 | static_cast<size_t>(dst_bytes_per_pixel)); | ||
| 208 | } | ||
| 209 | memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); | ||
| 210 | return true; | ||
| 211 | } | ||
| 212 | |||
| 213 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h new file mode 100644 index 000000000..3edf40c3e --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "video_core/engines/fermi_2d.h" | ||
| 7 | |||
| 8 | namespace Tegra { | ||
| 9 | class MemoryManager; | ||
| 10 | } | ||
| 11 | |||
| 12 | namespace Tegra::Engines::Blitter { | ||
| 13 | |||
| 14 | class SoftwareBlitEngine { | ||
| 15 | public: | ||
| 16 | SoftwareBlitEngine(MemoryManager& memory_manager_); | ||
| 17 | ~SoftwareBlitEngine(); | ||
| 18 | |||
| 19 | bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config); | ||
| 20 | |||
| 21 | private: | ||
| 22 | MemoryManager& memory_manager; | ||
| 23 | struct BlitEngineImpl; | ||
| 24 | std::unique_ptr<BlitEngineImpl> impl; | ||
| 25 | }; | ||
| 26 | |||
| 27 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp new file mode 100644 index 000000000..2e376f430 --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.cpp | |||
| @@ -0,0 +1,1097 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <array> | ||
| 5 | #include <bit> | ||
| 6 | #include <cmath> | ||
| 7 | #include <span> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "video_core/engines/sw_blitter/converter.h" | ||
| 12 | #include "video_core/surface.h" | ||
| 13 | #include "video_core/textures/decoders.h" | ||
| 14 | |||
| 15 | #ifdef _MSC_VER | ||
| 16 | #define FORCE_INLINE __forceinline | ||
| 17 | #else | ||
| 18 | #define FORCE_INLINE inline __attribute__((always_inline)) | ||
| 19 | #endif | ||
| 20 | |||
| 21 | namespace Tegra::Engines::Blitter { | ||
| 22 | |||
| 23 | enum class Swizzle : size_t { | ||
| 24 | R = 0, | ||
| 25 | G = 1, | ||
| 26 | B = 2, | ||
| 27 | A = 3, | ||
| 28 | None, | ||
| 29 | }; | ||
| 30 | |||
| 31 | enum class ComponentType : u32 { | ||
| 32 | SNORM = 1, | ||
| 33 | UNORM = 2, | ||
| 34 | SINT = 3, | ||
| 35 | UINT = 4, | ||
| 36 | SNORM_FORCE_FP16 = 5, | ||
| 37 | UNORM_FORCE_FP16 = 6, | ||
| 38 | FLOAT = 7, | ||
| 39 | SRGB = 8, | ||
| 40 | }; | ||
| 41 | |||
| 42 | namespace { | ||
| 43 | |||
| 44 | constexpr std::array<f32, 256> SRGB_TO_RGB_LUT = { | ||
| 45 | 0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f, | ||
| 46 | 1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f, | ||
| 47 | 3.676507e-03f, 4.024717e-03f, 4.391442e-03f, 4.776953e-03f, 5.181517e-03f, 5.605392e-03f, | ||
| 48 | 6.048833e-03f, 6.512091e-03f, 6.995410e-03f, 7.499032e-03f, 8.023193e-03f, 8.568126e-03f, | ||
| 49 | 9.134059e-03f, 9.721218e-03f, 1.032982e-02f, 1.096009e-02f, 1.161224e-02f, 1.228649e-02f, | ||
| 50 | 1.298303e-02f, 1.370208e-02f, 1.444384e-02f, 1.520851e-02f, 1.599629e-02f, 1.680738e-02f, | ||
| 51 | 1.764195e-02f, 1.850022e-02f, 1.938236e-02f, 2.028856e-02f, 2.121901e-02f, 2.217389e-02f, | ||
| 52 | 2.315337e-02f, 2.415763e-02f, 2.518686e-02f, 2.624122e-02f, 2.732089e-02f, 2.842604e-02f, | ||
| 53 | 2.955684e-02f, 3.071344e-02f, 3.189603e-02f, 3.310477e-02f, 3.433981e-02f, 3.560131e-02f, | ||
| 54 | 3.688945e-02f, 3.820437e-02f, 3.954624e-02f, 4.091520e-02f, 4.231141e-02f, 4.373503e-02f, | ||
| 55 | 4.518620e-02f, 4.666509e-02f, 4.817183e-02f, 4.970657e-02f, 5.126946e-02f, 5.286065e-02f, | ||
| 56 | 5.448028e-02f, 5.612849e-02f, 5.780543e-02f, 5.951124e-02f, 6.124605e-02f, 6.301001e-02f, | ||
| 57 | 6.480327e-02f, 6.662594e-02f, 6.847817e-02f, 7.036009e-02f, 7.227185e-02f, 7.421357e-02f, | ||
| 58 | 7.618538e-02f, 7.818742e-02f, 8.021982e-02f, 8.228271e-02f, 8.437621e-02f, 8.650046e-02f, | ||
| 59 | 8.865558e-02f, 9.084171e-02f, 9.305897e-02f, 9.530747e-02f, 9.758735e-02f, 9.989873e-02f, | ||
| 60 | 1.022417e-01f, 1.046165e-01f, 1.070231e-01f, 1.094617e-01f, 1.119324e-01f, 1.144354e-01f, | ||
| 61 | 1.169707e-01f, 1.195384e-01f, 1.221388e-01f, 1.247718e-01f, 1.274377e-01f, 1.301365e-01f, | ||
| 62 | 1.328683e-01f, 1.356333e-01f, 1.384316e-01f, 1.412633e-01f, 1.441285e-01f, 1.470273e-01f, | ||
| 63 | 1.499598e-01f, 1.529261e-01f, 1.559265e-01f, 1.589608e-01f, 1.620294e-01f, 1.651322e-01f, | ||
| 64 | 1.682694e-01f, 1.714411e-01f, 1.746474e-01f, 1.778884e-01f, 1.811642e-01f, 1.844750e-01f, | ||
| 65 | 1.878208e-01f, 1.912017e-01f, 1.946178e-01f, 1.980693e-01f, 2.015563e-01f, 2.050787e-01f, | ||
| 66 | 2.086369e-01f, 2.122308e-01f, 2.158605e-01f, 2.195262e-01f, 2.232280e-01f, 2.269659e-01f, | ||
| 67 | 2.307401e-01f, 2.345506e-01f, 2.383976e-01f, 2.422811e-01f, 2.462013e-01f, 2.501583e-01f, | ||
| 68 | 2.541521e-01f, 2.581829e-01f, 2.622507e-01f, 2.663556e-01f, 2.704978e-01f, 2.746773e-01f, | ||
| 69 | 2.788943e-01f, 2.831487e-01f, 2.874408e-01f, 2.917706e-01f, 2.961383e-01f, 3.005438e-01f, | ||
| 70 | 3.049873e-01f, 3.094689e-01f, 3.139887e-01f, 3.185468e-01f, 3.231432e-01f, 3.277781e-01f, | ||
| 71 | 3.324515e-01f, 3.371636e-01f, 3.419144e-01f, 3.467041e-01f, 3.515326e-01f, 3.564001e-01f, | ||
| 72 | 3.613068e-01f, 3.662526e-01f, 3.712377e-01f, 3.762621e-01f, 3.813260e-01f, 3.864294e-01f, | ||
| 73 | 3.915725e-01f, 3.967552e-01f, 4.019778e-01f, 4.072402e-01f, 4.125426e-01f, 4.178851e-01f, | ||
| 74 | 4.232677e-01f, 4.286905e-01f, 4.341536e-01f, 4.396572e-01f, 4.452012e-01f, 4.507858e-01f, | ||
| 75 | 4.564110e-01f, 4.620770e-01f, 4.677838e-01f, 4.735315e-01f, 4.793202e-01f, 4.851499e-01f, | ||
| 76 | 4.910209e-01f, 4.969330e-01f, 5.028865e-01f, 5.088813e-01f, 5.149177e-01f, 5.209956e-01f, | ||
| 77 | 5.271151e-01f, 5.332764e-01f, 5.394795e-01f, 5.457245e-01f, 5.520114e-01f, 5.583404e-01f, | ||
| 78 | 5.647115e-01f, 5.711249e-01f, 5.775805e-01f, 5.840784e-01f, 5.906188e-01f, 5.972018e-01f, | ||
| 79 | 6.038274e-01f, 6.104956e-01f, 6.172066e-01f, 6.239604e-01f, 6.307572e-01f, 6.375968e-01f, | ||
| 80 | 6.444797e-01f, 6.514056e-01f, 6.583748e-01f, 6.653873e-01f, 6.724432e-01f, 6.795425e-01f, | ||
| 81 | 6.866853e-01f, 6.938717e-01f, 7.011019e-01f, 7.083758e-01f, 7.156935e-01f, 7.230551e-01f, | ||
| 82 | 7.304608e-01f, 7.379104e-01f, 7.454042e-01f, 7.529422e-01f, 7.605245e-01f, 7.681512e-01f, | ||
| 83 | 7.758222e-01f, 7.835378e-01f, 7.912979e-01f, 7.991027e-01f, 8.069522e-01f, 8.148466e-01f, | ||
| 84 | 8.227857e-01f, 8.307699e-01f, 8.387990e-01f, 8.468732e-01f, 8.549926e-01f, 8.631572e-01f, | ||
| 85 | 8.713671e-01f, 8.796224e-01f, 8.879231e-01f, 8.962694e-01f, 9.046612e-01f, 9.130986e-01f, | ||
| 86 | 9.215819e-01f, 9.301109e-01f, 9.386857e-01f, 9.473065e-01f, 9.559733e-01f, 9.646863e-01f, | ||
| 87 | 9.734453e-01f, 9.822506e-01f, 9.911021e-01f, 1.000000e+00f}; | ||
| 88 | |||
| 89 | constexpr std::array<f32, 256> RGB_TO_SRGB_LUT = { | ||
| 90 | 0.000000e+00f, 4.984009e-02f, 8.494473e-02f, 1.107021e-01f, 1.318038e-01f, 1.500052e-01f, | ||
| 91 | 1.661857e-01f, 1.808585e-01f, 1.943532e-01f, 2.068957e-01f, 2.186491e-01f, 2.297351e-01f, | ||
| 92 | 2.402475e-01f, 2.502604e-01f, 2.598334e-01f, 2.690152e-01f, 2.778465e-01f, 2.863614e-01f, | ||
| 93 | 2.945889e-01f, 3.025538e-01f, 3.102778e-01f, 3.177796e-01f, 3.250757e-01f, 3.321809e-01f, | ||
| 94 | 3.391081e-01f, 3.458689e-01f, 3.524737e-01f, 3.589320e-01f, 3.652521e-01f, 3.714419e-01f, | ||
| 95 | 3.775084e-01f, 3.834581e-01f, 3.892968e-01f, 3.950301e-01f, 4.006628e-01f, 4.061998e-01f, | ||
| 96 | 4.116451e-01f, 4.170030e-01f, 4.222770e-01f, 4.274707e-01f, 4.325873e-01f, 4.376298e-01f, | ||
| 97 | 4.426010e-01f, 4.475037e-01f, 4.523403e-01f, 4.571131e-01f, 4.618246e-01f, 4.664766e-01f, | ||
| 98 | 4.710712e-01f, 4.756104e-01f, 4.800958e-01f, 4.845292e-01f, 4.889122e-01f, 4.932462e-01f, | ||
| 99 | 4.975329e-01f, 5.017734e-01f, 5.059693e-01f, 5.101216e-01f, 5.142317e-01f, 5.183006e-01f, | ||
| 100 | 5.223295e-01f, 5.263194e-01f, 5.302714e-01f, 5.341862e-01f, 5.380651e-01f, 5.419087e-01f, | ||
| 101 | 5.457181e-01f, 5.494938e-01f, 5.532369e-01f, 5.569480e-01f, 5.606278e-01f, 5.642771e-01f, | ||
| 102 | 5.678965e-01f, 5.714868e-01f, 5.750484e-01f, 5.785821e-01f, 5.820884e-01f, 5.855680e-01f, | ||
| 103 | 5.890211e-01f, 5.924487e-01f, 5.958509e-01f, 5.992285e-01f, 6.025819e-01f, 6.059114e-01f, | ||
| 104 | 6.092176e-01f, 6.125010e-01f, 6.157619e-01f, 6.190008e-01f, 6.222180e-01f, 6.254140e-01f, | ||
| 105 | 6.285890e-01f, 6.317436e-01f, 6.348780e-01f, 6.379926e-01f, 6.410878e-01f, 6.441637e-01f, | ||
| 106 | 6.472208e-01f, 6.502595e-01f, 6.532799e-01f, 6.562824e-01f, 6.592672e-01f, 6.622347e-01f, | ||
| 107 | 6.651851e-01f, 6.681187e-01f, 6.710356e-01f, 6.739363e-01f, 6.768209e-01f, 6.796897e-01f, | ||
| 108 | 6.825429e-01f, 6.853807e-01f, 6.882034e-01f, 6.910111e-01f, 6.938041e-01f, 6.965826e-01f, | ||
| 109 | 6.993468e-01f, 7.020969e-01f, 7.048331e-01f, 7.075556e-01f, 7.102645e-01f, 7.129600e-01f, | ||
| 110 | 7.156424e-01f, 7.183118e-01f, 7.209683e-01f, 7.236121e-01f, 7.262435e-01f, 7.288625e-01f, | ||
| 111 | 7.314693e-01f, 7.340640e-01f, 7.366470e-01f, 7.392181e-01f, 7.417776e-01f, 7.443256e-01f, | ||
| 112 | 7.468624e-01f, 7.493880e-01f, 7.519025e-01f, 7.544061e-01f, 7.568989e-01f, 7.593810e-01f, | ||
| 113 | 7.618526e-01f, 7.643137e-01f, 7.667645e-01f, 7.692052e-01f, 7.716358e-01f, 7.740564e-01f, | ||
| 114 | 7.764671e-01f, 7.788681e-01f, 7.812595e-01f, 7.836413e-01f, 7.860138e-01f, 7.883768e-01f, | ||
| 115 | 7.907307e-01f, 7.930754e-01f, 7.954110e-01f, 7.977377e-01f, 8.000556e-01f, 8.023647e-01f, | ||
| 116 | 8.046651e-01f, 8.069569e-01f, 8.092403e-01f, 8.115152e-01f, 8.137818e-01f, 8.160402e-01f, | ||
| 117 | 8.182903e-01f, 8.205324e-01f, 8.227665e-01f, 8.249926e-01f, 8.272109e-01f, 8.294214e-01f, | ||
| 118 | 8.316242e-01f, 8.338194e-01f, 8.360070e-01f, 8.381871e-01f, 8.403597e-01f, 8.425251e-01f, | ||
| 119 | 8.446831e-01f, 8.468339e-01f, 8.489776e-01f, 8.511142e-01f, 8.532437e-01f, 8.553662e-01f, | ||
| 120 | 8.574819e-01f, 8.595907e-01f, 8.616927e-01f, 8.637881e-01f, 8.658767e-01f, 8.679587e-01f, | ||
| 121 | 8.700342e-01f, 8.721032e-01f, 8.741657e-01f, 8.762218e-01f, 8.782716e-01f, 8.803151e-01f, | ||
| 122 | 8.823524e-01f, 8.843835e-01f, 8.864085e-01f, 8.884274e-01f, 8.904402e-01f, 8.924471e-01f, | ||
| 123 | 8.944480e-01f, 8.964431e-01f, 8.984324e-01f, 9.004158e-01f, 9.023935e-01f, 9.043654e-01f, | ||
| 124 | 9.063318e-01f, 9.082925e-01f, 9.102476e-01f, 9.121972e-01f, 9.141413e-01f, 9.160800e-01f, | ||
| 125 | 9.180133e-01f, 9.199412e-01f, 9.218637e-01f, 9.237810e-01f, 9.256931e-01f, 9.276000e-01f, | ||
| 126 | 9.295017e-01f, 9.313982e-01f, 9.332896e-01f, 9.351761e-01f, 9.370575e-01f, 9.389339e-01f, | ||
| 127 | 9.408054e-01f, 9.426719e-01f, 9.445336e-01f, 9.463905e-01f, 9.482424e-01f, 9.500897e-01f, | ||
| 128 | 9.519322e-01f, 9.537700e-01f, 9.556032e-01f, 9.574316e-01f, 9.592555e-01f, 9.610748e-01f, | ||
| 129 | 9.628896e-01f, 9.646998e-01f, 9.665055e-01f, 9.683068e-01f, 9.701037e-01f, 9.718961e-01f, | ||
| 130 | 9.736842e-01f, 9.754679e-01f, 9.772474e-01f, 9.790225e-01f, 9.807934e-01f, 9.825601e-01f, | ||
| 131 | 9.843225e-01f, 9.860808e-01f, 9.878350e-01f, 9.895850e-01f, 9.913309e-01f, 9.930727e-01f, | ||
| 132 | 9.948106e-01f, 9.965444e-01f, 9.982741e-01f, 1.000000e+00f}; | ||
| 133 | |||
| 134 | } // namespace | ||
| 135 | |||
| 136 | struct R32B32G32A32_FLOATTraits { | ||
| 137 | static constexpr size_t num_components = 4; | ||
| 138 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 139 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 140 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 141 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 142 | Swizzle::A, Swizzle::G, Swizzle::B, Swizzle::R}; | ||
| 143 | }; | ||
| 144 | |||
| 145 | struct R32G32B32A32_SINTTraits { | ||
| 146 | static constexpr size_t num_components = 4; | ||
| 147 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 148 | ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; | ||
| 149 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 150 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 151 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 152 | }; | ||
| 153 | |||
| 154 | struct R32G32B32A32_UINTTraits { | ||
| 155 | static constexpr size_t num_components = 4; | ||
| 156 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 157 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 158 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 159 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 160 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 161 | }; | ||
| 162 | |||
| 163 | struct R16G16B16A16_UNORMTraits { | ||
| 164 | static constexpr size_t num_components = 4; | ||
| 165 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 166 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 167 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 168 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 169 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 170 | }; | ||
| 171 | |||
| 172 | struct R16G16B16A16_SNORMTraits { | ||
| 173 | static constexpr size_t num_components = 4; | ||
| 174 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 175 | ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; | ||
| 176 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 177 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 178 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 179 | }; | ||
| 180 | |||
| 181 | struct R16G16B16A16_SINTTraits { | ||
| 182 | static constexpr size_t num_components = 4; | ||
| 183 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 184 | ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; | ||
| 185 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 186 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 187 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 188 | }; | ||
| 189 | |||
| 190 | struct R16G16B16A16_UINTTraits { | ||
| 191 | static constexpr size_t num_components = 4; | ||
| 192 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 193 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 194 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 195 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 196 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 197 | }; | ||
| 198 | |||
| 199 | struct R16G16B16A16_FLOATTraits { | ||
| 200 | static constexpr size_t num_components = 4; | ||
| 201 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 202 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 203 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 204 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 205 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 206 | }; | ||
| 207 | |||
| 208 | struct R32G32_FLOATTraits { | ||
| 209 | static constexpr size_t num_components = 2; | ||
| 210 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 211 | ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 212 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; | ||
| 213 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 214 | Swizzle::R}; | ||
| 215 | }; | ||
| 216 | |||
| 217 | struct R32G32_SINTTraits { | ||
| 218 | static constexpr size_t num_components = 2; | ||
| 219 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 220 | ComponentType::SINT, ComponentType::SINT}; | ||
| 221 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; | ||
| 222 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 223 | Swizzle::R}; | ||
| 224 | }; | ||
| 225 | |||
| 226 | struct R32G32_UINTTraits { | ||
| 227 | static constexpr size_t num_components = 2; | ||
| 228 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 229 | ComponentType::UINT, ComponentType::UINT}; | ||
| 230 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; | ||
| 231 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 232 | Swizzle::R}; | ||
| 233 | }; | ||
| 234 | |||
| 235 | struct R16G16B16X16_FLOATTraits { | ||
| 236 | static constexpr size_t num_components = 4; | ||
| 237 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 238 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 239 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 240 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 241 | Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 242 | }; | ||
| 243 | |||
| 244 | struct A8R8G8B8_UNORMTraits { | ||
| 245 | static constexpr size_t num_components = 4; | ||
| 246 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 247 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 248 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 249 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 250 | Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; | ||
| 251 | }; | ||
| 252 | |||
| 253 | struct A8R8G8B8_SRGBTraits { | ||
| 254 | static constexpr size_t num_components = 4; | ||
| 255 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 256 | ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; | ||
| 257 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 258 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 259 | Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; | ||
| 260 | }; | ||
| 261 | |||
| 262 | struct A2B10G10R10_UNORMTraits { | ||
| 263 | static constexpr size_t num_components = 4; | ||
| 264 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 265 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 266 | static constexpr std::array<size_t, num_components> component_sizes = {10, 10, 10, 2}; | ||
| 267 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 268 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 269 | }; | ||
| 270 | |||
| 271 | struct A2B10G10R10_UINTTraits { | ||
| 272 | static constexpr size_t num_components = 4; | ||
| 273 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 274 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 275 | static constexpr std::array<size_t, num_components> component_sizes = {10, 10, 10, 2}; | ||
| 276 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 277 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 278 | }; | ||
| 279 | |||
| 280 | struct A8B8G8R8_UNORMTraits { | ||
| 281 | static constexpr size_t num_components = 4; | ||
| 282 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 283 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 284 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 285 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 286 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 287 | }; | ||
| 288 | |||
| 289 | struct A8B8G8R8_SRGBTraits { | ||
| 290 | static constexpr size_t num_components = 4; | ||
| 291 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 292 | ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; | ||
| 293 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 294 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 295 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 296 | }; | ||
| 297 | |||
| 298 | struct A8B8G8R8_SNORMTraits { | ||
| 299 | static constexpr size_t num_components = 4; | ||
| 300 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 301 | ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; | ||
| 302 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 303 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 304 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 305 | }; | ||
| 306 | |||
| 307 | struct A8B8G8R8_SINTTraits { | ||
| 308 | static constexpr size_t num_components = 4; | ||
| 309 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 310 | ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; | ||
| 311 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 312 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 313 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 314 | }; | ||
| 315 | |||
| 316 | struct A8B8G8R8_UINTTraits { | ||
| 317 | static constexpr size_t num_components = 4; | ||
| 318 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 319 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 320 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 321 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 322 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 323 | }; | ||
| 324 | |||
| 325 | struct R16G16_UNORMTraits { | ||
| 326 | static constexpr size_t num_components = 2; | ||
| 327 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 328 | ComponentType::UNORM, ComponentType::UNORM}; | ||
| 329 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 330 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 331 | Swizzle::R}; | ||
| 332 | }; | ||
| 333 | |||
| 334 | struct R16G16_SNORMTraits { | ||
| 335 | static constexpr size_t num_components = 2; | ||
| 336 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 337 | ComponentType::SNORM, ComponentType::SNORM}; | ||
| 338 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 339 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 340 | Swizzle::R}; | ||
| 341 | }; | ||
| 342 | |||
| 343 | struct R16G16_SINTTraits { | ||
| 344 | static constexpr size_t num_components = 2; | ||
| 345 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 346 | ComponentType::SINT, ComponentType::SINT}; | ||
| 347 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 348 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 349 | Swizzle::R}; | ||
| 350 | }; | ||
| 351 | |||
| 352 | struct R16G16_UINTTraits { | ||
| 353 | static constexpr size_t num_components = 2; | ||
| 354 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 355 | ComponentType::UINT, ComponentType::UINT}; | ||
| 356 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 357 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 358 | Swizzle::R}; | ||
| 359 | }; | ||
| 360 | |||
| 361 | struct R16G16_FLOATTraits { | ||
| 362 | static constexpr size_t num_components = 2; | ||
| 363 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 364 | ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 365 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 366 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 367 | Swizzle::R}; | ||
| 368 | }; | ||
| 369 | |||
| 370 | struct B10G11R11_FLOATTraits { | ||
| 371 | static constexpr size_t num_components = 3; | ||
| 372 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 373 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 374 | static constexpr std::array<size_t, num_components> component_sizes = {11, 11, 10}; | ||
| 375 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 376 | Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 377 | }; | ||
| 378 | |||
| 379 | struct R32_SINTTraits { | ||
| 380 | static constexpr size_t num_components = 1; | ||
| 381 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 382 | ComponentType::SINT}; | ||
| 383 | static constexpr std::array<size_t, num_components> component_sizes = {32}; | ||
| 384 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 385 | }; | ||
| 386 | |||
| 387 | struct R32_UINTTraits { | ||
| 388 | static constexpr size_t num_components = 1; | ||
| 389 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 390 | ComponentType::UINT}; | ||
| 391 | static constexpr std::array<size_t, num_components> component_sizes = {32}; | ||
| 392 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 393 | }; | ||
| 394 | |||
| 395 | struct R32_FLOATTraits { | ||
| 396 | static constexpr size_t num_components = 1; | ||
| 397 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 398 | ComponentType::FLOAT}; | ||
| 399 | static constexpr std::array<size_t, num_components> component_sizes = {32}; | ||
| 400 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 401 | }; | ||
| 402 | |||
| 403 | struct R5G6B5_UNORMTraits { | ||
| 404 | static constexpr size_t num_components = 3; | ||
| 405 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 406 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 407 | static constexpr std::array<size_t, num_components> component_sizes = {5, 6, 5}; | ||
| 408 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 409 | Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 410 | }; | ||
| 411 | |||
| 412 | struct A1R5G5B5_UNORMTraits { | ||
| 413 | static constexpr size_t num_components = 4; | ||
| 414 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 415 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 416 | static constexpr std::array<size_t, num_components> component_sizes = {5, 5, 5, 1}; | ||
| 417 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 418 | Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; | ||
| 419 | }; | ||
| 420 | |||
| 421 | struct R8G8_UNORMTraits { | ||
| 422 | static constexpr size_t num_components = 2; | ||
| 423 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 424 | ComponentType::UNORM, ComponentType::UNORM}; | ||
| 425 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 426 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 427 | Swizzle::R}; | ||
| 428 | }; | ||
| 429 | |||
| 430 | struct R8G8_SNORMTraits { | ||
| 431 | static constexpr size_t num_components = 2; | ||
| 432 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 433 | ComponentType::SNORM, ComponentType::SNORM}; | ||
| 434 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 435 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 436 | Swizzle::R}; | ||
| 437 | }; | ||
| 438 | |||
| 439 | struct R8G8_SINTTraits { | ||
| 440 | static constexpr size_t num_components = 2; | ||
| 441 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 442 | ComponentType::SINT, ComponentType::SINT}; | ||
| 443 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 444 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 445 | Swizzle::R}; | ||
| 446 | }; | ||
| 447 | |||
| 448 | struct R8G8_UINTTraits { | ||
| 449 | static constexpr size_t num_components = 2; | ||
| 450 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 451 | ComponentType::UINT, ComponentType::UINT}; | ||
| 452 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 453 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G, | ||
| 454 | Swizzle::R}; | ||
| 455 | }; | ||
| 456 | |||
| 457 | struct R16_UNORMTraits { | ||
| 458 | static constexpr size_t num_components = 1; | ||
| 459 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 460 | ComponentType::UNORM}; | ||
| 461 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 462 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 463 | }; | ||
| 464 | |||
| 465 | struct R16_SNORMTraits { | ||
| 466 | static constexpr size_t num_components = 1; | ||
| 467 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 468 | ComponentType::SNORM}; | ||
| 469 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 470 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 471 | }; | ||
| 472 | |||
| 473 | struct R16_SINTTraits { | ||
| 474 | static constexpr size_t num_components = 1; | ||
| 475 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 476 | ComponentType::SINT}; | ||
| 477 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 478 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 479 | }; | ||
| 480 | |||
| 481 | struct R16_UINTTraits { | ||
| 482 | static constexpr size_t num_components = 1; | ||
| 483 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 484 | ComponentType::UINT}; | ||
| 485 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 486 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 487 | }; | ||
| 488 | |||
| 489 | struct R16_FLOATTraits { | ||
| 490 | static constexpr size_t num_components = 1; | ||
| 491 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 492 | ComponentType::FLOAT}; | ||
| 493 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 494 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 495 | }; | ||
| 496 | |||
| 497 | struct R8_UNORMTraits { | ||
| 498 | static constexpr size_t num_components = 1; | ||
| 499 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 500 | ComponentType::UNORM}; | ||
| 501 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 502 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 503 | }; | ||
| 504 | |||
| 505 | struct R8_SNORMTraits { | ||
| 506 | static constexpr size_t num_components = 1; | ||
| 507 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 508 | ComponentType::SNORM}; | ||
| 509 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 510 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 511 | }; | ||
| 512 | |||
| 513 | struct R8_SINTTraits { | ||
| 514 | static constexpr size_t num_components = 1; | ||
| 515 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 516 | ComponentType::SINT}; | ||
| 517 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 518 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 519 | }; | ||
| 520 | |||
| 521 | struct R8_UINTTraits { | ||
| 522 | static constexpr size_t num_components = 1; | ||
| 523 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 524 | ComponentType::UINT}; | ||
| 525 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 526 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 527 | }; | ||
| 528 | |||
| 529 | template <class ConverterTraits> | ||
| 530 | class ConverterImpl : public Converter { | ||
| 531 | private: | ||
| 532 | static constexpr size_t num_components = ConverterTraits::num_components; | ||
| 533 | static constexpr std::array<ComponentType, num_components> component_types = | ||
| 534 | ConverterTraits::component_types; | ||
| 535 | static constexpr std::array<size_t, num_components> component_sizes = | ||
| 536 | ConverterTraits::component_sizes; | ||
| 537 | static constexpr std::array<Swizzle, num_components> component_swizzle = | ||
| 538 | ConverterTraits::component_swizzle; | ||
| 539 | |||
| 540 | static constexpr size_t CalculateByteSize() { | ||
| 541 | size_t size = 0; | ||
| 542 | for (const size_t component_size : component_sizes) { | ||
| 543 | size += component_size; | ||
| 544 | } | ||
| 545 | const size_t power = (sizeof(size_t) * 8) - std::countl_zero(size) - 1ULL; | ||
| 546 | const size_t base_size = 1ULL << power; | ||
| 547 | const size_t mask = base_size - 1ULL; | ||
| 548 | return ((size & mask) != 0 ? base_size << 1ULL : base_size) / 8; | ||
| 549 | } | ||
| 550 | |||
| 551 | static constexpr size_t total_bytes_per_pixel = CalculateByteSize(); | ||
| 552 | static constexpr size_t total_words_per_pixel = | ||
| 553 | (total_bytes_per_pixel + sizeof(u32) - 1U) / sizeof(u32); | ||
| 554 | static constexpr size_t components_per_ir_rep = 4; | ||
| 555 | |||
| 556 | template <bool get_offsets> | ||
| 557 | static constexpr std::array<size_t, num_components> GetBoundWordsOffsets() { | ||
| 558 | std::array<size_t, num_components> result; | ||
| 559 | result.fill(0); | ||
| 560 | constexpr size_t total_bits_per_word = sizeof(u32) * 8; | ||
| 561 | size_t accumulated_size = 0; | ||
| 562 | size_t count = 0; | ||
| 563 | for (size_t i = 0; i < num_components; i++) { | ||
| 564 | if constexpr (get_offsets) { | ||
| 565 | result[i] = accumulated_size; | ||
| 566 | } else { | ||
| 567 | result[i] = count; | ||
| 568 | } | ||
| 569 | accumulated_size += component_sizes[i]; | ||
| 570 | if (accumulated_size > total_bits_per_word) { | ||
| 571 | if constexpr (get_offsets) { | ||
| 572 | result[i] = 0; | ||
| 573 | } else { | ||
| 574 | result[i]++; | ||
| 575 | } | ||
| 576 | count++; | ||
| 577 | accumulated_size = component_sizes[i]; | ||
| 578 | } | ||
| 579 | } | ||
| 580 | return result; | ||
| 581 | } | ||
| 582 | |||
| 583 | static constexpr std::array<size_t, num_components> bound_words = GetBoundWordsOffsets<false>(); | ||
| 584 | static constexpr std::array<size_t, num_components> bound_offsets = | ||
| 585 | GetBoundWordsOffsets<true>(); | ||
| 586 | |||
| 587 | static constexpr std::array<u32, num_components> GetComponentsMask() { | ||
| 588 | std::array<u32, num_components> result; | ||
| 589 | for (size_t i = 0; i < num_components; i++) { | ||
| 590 | result[i] = (((u32)~0) >> (8 * sizeof(u32) - component_sizes[i])) << bound_offsets[i]; | ||
| 591 | } | ||
| 592 | return result; | ||
| 593 | } | ||
| 594 | |||
| 595 | static constexpr std::array<u32, num_components> component_mask = GetComponentsMask(); | ||
| 596 | |||
| 597 | // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function | ||
| 598 | // calls, it may fail to detect the benefit of inlining. | ||
| 599 | template <size_t which_component> | ||
| 600 | FORCE_INLINE void ConvertToComponent(u32 which_word, f32& out_component) { | ||
| 601 | const u32 value = (which_word >> bound_offsets[which_component]) & | ||
| 602 | static_cast<u32>((1ULL << component_sizes[which_component]) - 1ULL); | ||
| 603 | const auto sign_extend = [](u32 base_value, size_t bits) { | ||
| 604 | const size_t shift_amount = sizeof(u32) * 8 - bits; | ||
| 605 | s32 shifted_value = static_cast<s32>(base_value << shift_amount); | ||
| 606 | return shifted_value >> shift_amount; | ||
| 607 | }; | ||
| 608 | const auto force_to_fp16 = [](f32 base_value) { | ||
| 609 | u32 tmp = std::bit_cast<u32>(base_value); | ||
| 610 | constexpr size_t fp32_mantissa_bits = 23; | ||
| 611 | constexpr size_t fp16_mantissa_bits = 10; | ||
| 612 | constexpr size_t mantissa_mask = | ||
| 613 | ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL); | ||
| 614 | tmp = tmp & mantissa_mask; | ||
| 615 | // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM | ||
| 616 | return std::bit_cast<f32>(tmp); | ||
| 617 | }; | ||
| 618 | const auto from_fp_n = [&sign_extend](u32 base_value, size_t bits, size_t mantissa) { | ||
| 619 | constexpr size_t fp32_mantissa_bits = 23; | ||
| 620 | size_t shift_towards = fp32_mantissa_bits - mantissa; | ||
| 621 | const u32 new_value = | ||
| 622 | static_cast<u32>(sign_extend(base_value, bits) << shift_towards) & (~(1U << 31)); | ||
| 623 | return std::bit_cast<f32>(new_value); | ||
| 624 | }; | ||
| 625 | const auto calculate_snorm = [&]() { | ||
| 626 | return static_cast<f32>( | ||
| 627 | static_cast<f64>(sign_extend(value, component_sizes[which_component])) / | ||
| 628 | ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); | ||
| 629 | }; | ||
| 630 | const auto calculate_unorm = [&]() { | ||
| 631 | return static_cast<f32>(static_cast<f32>(value) / | ||
| 632 | ((1ULL << (component_sizes[which_component])) - 1ULL)); | ||
| 633 | }; | ||
| 634 | if constexpr (component_types[which_component] == ComponentType::SNORM) { | ||
| 635 | out_component = calculate_snorm(); | ||
| 636 | } else if constexpr (component_types[which_component] == ComponentType::UNORM) { | ||
| 637 | out_component = calculate_unorm(); | ||
| 638 | } else if constexpr (component_types[which_component] == ComponentType::SINT) { | ||
| 639 | out_component = static_cast<f32>( | ||
| 640 | static_cast<s32>(sign_extend(value, component_sizes[which_component]))); | ||
| 641 | } else if constexpr (component_types[which_component] == ComponentType::UINT) { | ||
| 642 | out_component = static_cast<f32>( | ||
| 643 | static_cast<s32>(sign_extend(value, component_sizes[which_component]))); | ||
| 644 | } else if constexpr (component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { | ||
| 645 | out_component = calculate_snorm(); | ||
| 646 | out_component = force_to_fp16(out_component); | ||
| 647 | } else if constexpr (component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { | ||
| 648 | out_component = calculate_unorm(); | ||
| 649 | out_component = force_to_fp16(out_component); | ||
| 650 | } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { | ||
| 651 | if constexpr (component_sizes[which_component] == 32) { | ||
| 652 | out_component = std::bit_cast<f32>(value); | ||
| 653 | } else if constexpr (component_sizes[which_component] == 16) { | ||
| 654 | static constexpr u32 sign_mask = 0x8000; | ||
| 655 | static constexpr u32 mantissa_mask = 0x8000; | ||
| 656 | out_component = std::bit_cast<f32>(((value & sign_mask) << 16) | | ||
| 657 | (((value & 0x7c00) + 0x1C000) << 13) | | ||
| 658 | ((value & mantissa_mask) << 13)); | ||
| 659 | } else { | ||
| 660 | out_component = from_fp_n(value, component_sizes[which_component], | ||
| 661 | component_sizes[which_component] - 5); | ||
| 662 | } | ||
| 663 | } else if constexpr (component_types[which_component] == ComponentType::SRGB) { | ||
| 664 | if constexpr (component_swizzle[which_component] == Swizzle::A) { | ||
| 665 | out_component = calculate_unorm(); | ||
| 666 | } else if constexpr (component_sizes[which_component] == 8) { | ||
| 667 | out_component = SRGB_TO_RGB_LUT[value]; | ||
| 668 | } else { | ||
| 669 | out_component = calculate_unorm(); | ||
| 670 | UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", | ||
| 671 | component_sizes[which_component]); | ||
| 672 | } | ||
| 673 | } | ||
| 674 | } | ||
| 675 | |||
| 676 | // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function | ||
| 677 | // calls, it may fail to detect the benefit of inlining. | ||
| 678 | template <size_t which_component> | ||
| 679 | FORCE_INLINE void ConvertFromComponent(u32& which_word, f32 in_component) { | ||
| 680 | const auto insert_to_word = [&]<typename T>(T new_word) { | ||
| 681 | which_word |= (static_cast<u32>(new_word) << bound_offsets[which_component]) & | ||
| 682 | component_mask[which_component]; | ||
| 683 | }; | ||
| 684 | const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) { | ||
| 685 | constexpr size_t fp32_mantissa_bits = 23; | ||
| 686 | u32 tmp_value = std::bit_cast<u32>(std::max(base_value, 0.0f)); | ||
| 687 | size_t shift_towards = fp32_mantissa_bits - mantissa; | ||
| 688 | return tmp_value >> shift_towards; | ||
| 689 | }; | ||
| 690 | const auto calculate_unorm = [&]() { | ||
| 691 | return static_cast<u32>(static_cast<f32>(in_component) * | ||
| 692 | ((1ULL << (component_sizes[which_component])) - 1ULL)); | ||
| 693 | }; | ||
| 694 | if constexpr (component_types[which_component] == ComponentType::SNORM || | ||
| 695 | component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { | ||
| 696 | s32 tmp_word = | ||
| 697 | static_cast<s32>(static_cast<f64>(in_component) * | ||
| 698 | ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); | ||
| 699 | insert_to_word(tmp_word); | ||
| 700 | |||
| 701 | } else if constexpr (component_types[which_component] == ComponentType::UNORM || | ||
| 702 | component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { | ||
| 703 | u32 tmp_word = calculate_unorm(); | ||
| 704 | insert_to_word(tmp_word); | ||
| 705 | } else if constexpr (component_types[which_component] == ComponentType::SINT) { | ||
| 706 | s32 tmp_word = static_cast<s32>(in_component); | ||
| 707 | insert_to_word(tmp_word); | ||
| 708 | } else if constexpr (component_types[which_component] == ComponentType::UINT) { | ||
| 709 | u32 tmp_word = static_cast<u32>(in_component); | ||
| 710 | insert_to_word(tmp_word); | ||
| 711 | } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { | ||
| 712 | if constexpr (component_sizes[which_component] == 32) { | ||
| 713 | u32 tmp_word = std::bit_cast<u32>(in_component); | ||
| 714 | insert_to_word(tmp_word); | ||
| 715 | } else if constexpr (component_sizes[which_component] == 16) { | ||
| 716 | static constexpr u32 sign_mask = 0x8000; | ||
| 717 | static constexpr u32 mantissa_mask = 0x8000; | ||
| 718 | const u32 tmp_word = std::bit_cast<u32>(in_component); | ||
| 719 | const u32 half = ((tmp_word >> 16) & sign_mask) | | ||
| 720 | ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & 0x7c00) | | ||
| 721 | ((tmp_word >> 13) & 0x03ff); | ||
| 722 | insert_to_word(half); | ||
| 723 | } else { | ||
| 724 | insert_to_word(to_fp_n(in_component, component_sizes[which_component], | ||
| 725 | component_sizes[which_component] - 5)); | ||
| 726 | } | ||
| 727 | } else if constexpr (component_types[which_component] == ComponentType::SRGB) { | ||
| 728 | if constexpr (component_swizzle[which_component] != Swizzle::A) { | ||
| 729 | if constexpr (component_sizes[which_component] == 8) { | ||
| 730 | const u32 index = calculate_unorm(); | ||
| 731 | in_component = RGB_TO_SRGB_LUT[index]; | ||
| 732 | } else { | ||
| 733 | UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", | ||
| 734 | component_sizes[which_component]); | ||
| 735 | } | ||
| 736 | } | ||
| 737 | const u32 tmp_word = calculate_unorm(); | ||
| 738 | insert_to_word(tmp_word); | ||
| 739 | } | ||
| 740 | } | ||
| 741 | |||
| 742 | public: | ||
| 743 | void ConvertTo(std::span<u8> input, std::span<f32> output) override { | ||
| 744 | const size_t num_pixels = output.size() / components_per_ir_rep; | ||
| 745 | for (size_t pixel = 0; pixel < num_pixels; pixel++) { | ||
| 746 | std::array<u32, total_words_per_pixel> words{}; | ||
| 747 | |||
| 748 | std::memcpy(words.data(), &input[pixel * total_bytes_per_pixel], total_bytes_per_pixel); | ||
| 749 | std::span<f32> new_components(&output[pixel * components_per_ir_rep], | ||
| 750 | components_per_ir_rep); | ||
| 751 | if constexpr (component_swizzle[0] != Swizzle::None) { | ||
| 752 | ConvertToComponent<0>(words[bound_words[0]], | ||
| 753 | new_components[static_cast<size_t>(component_swizzle[0])]); | ||
| 754 | } else { | ||
| 755 | new_components[0] = 0.0f; | ||
| 756 | } | ||
| 757 | if constexpr (num_components >= 2) { | ||
| 758 | if constexpr (component_swizzle[1] != Swizzle::None) { | ||
| 759 | ConvertToComponent<1>( | ||
| 760 | words[bound_words[1]], | ||
| 761 | new_components[static_cast<size_t>(component_swizzle[1])]); | ||
| 762 | } else { | ||
| 763 | new_components[1] = 0.0f; | ||
| 764 | } | ||
| 765 | } else { | ||
| 766 | new_components[1] = 0.0f; | ||
| 767 | } | ||
| 768 | if constexpr (num_components >= 3) { | ||
| 769 | if constexpr (component_swizzle[2] != Swizzle::None) { | ||
| 770 | ConvertToComponent<2>( | ||
| 771 | words[bound_words[2]], | ||
| 772 | new_components[static_cast<size_t>(component_swizzle[2])]); | ||
| 773 | } else { | ||
| 774 | new_components[2] = 0.0f; | ||
| 775 | } | ||
| 776 | } else { | ||
| 777 | new_components[2] = 0.0f; | ||
| 778 | } | ||
| 779 | if constexpr (num_components >= 4) { | ||
| 780 | if constexpr (component_swizzle[3] != Swizzle::None) { | ||
| 781 | ConvertToComponent<3>( | ||
| 782 | words[bound_words[3]], | ||
| 783 | new_components[static_cast<size_t>(component_swizzle[3])]); | ||
| 784 | } else { | ||
| 785 | new_components[3] = 0.0f; | ||
| 786 | } | ||
| 787 | } else { | ||
| 788 | new_components[3] = 0.0f; | ||
| 789 | } | ||
| 790 | } | ||
| 791 | } | ||
| 792 | |||
| 793 | void ConvertFrom(std::span<f32> input, std::span<u8> output) override { | ||
| 794 | const size_t num_pixels = output.size() / total_bytes_per_pixel; | ||
| 795 | for (size_t pixel = 0; pixel < num_pixels; pixel++) { | ||
| 796 | std::span<f32> old_components(&input[pixel * components_per_ir_rep], | ||
| 797 | components_per_ir_rep); | ||
| 798 | std::array<u32, total_words_per_pixel> words{}; | ||
| 799 | if constexpr (component_swizzle[0] != Swizzle::None) { | ||
| 800 | ConvertFromComponent<0>(words[bound_words[0]], | ||
| 801 | old_components[static_cast<size_t>(component_swizzle[0])]); | ||
| 802 | } | ||
| 803 | if constexpr (num_components >= 2) { | ||
| 804 | if constexpr (component_swizzle[1] != Swizzle::None) { | ||
| 805 | ConvertFromComponent<1>( | ||
| 806 | words[bound_words[1]], | ||
| 807 | old_components[static_cast<size_t>(component_swizzle[1])]); | ||
| 808 | } | ||
| 809 | } | ||
| 810 | if constexpr (num_components >= 3) { | ||
| 811 | if constexpr (component_swizzle[2] != Swizzle::None) { | ||
| 812 | ConvertFromComponent<2>( | ||
| 813 | words[bound_words[2]], | ||
| 814 | old_components[static_cast<size_t>(component_swizzle[2])]); | ||
| 815 | } | ||
| 816 | } | ||
| 817 | if constexpr (num_components >= 4) { | ||
| 818 | if constexpr (component_swizzle[3] != Swizzle::None) { | ||
| 819 | ConvertFromComponent<3>( | ||
| 820 | words[bound_words[3]], | ||
| 821 | old_components[static_cast<size_t>(component_swizzle[3])]); | ||
| 822 | } | ||
| 823 | } | ||
| 824 | std::memcpy(&output[pixel * total_bytes_per_pixel], words.data(), | ||
| 825 | total_bytes_per_pixel); | ||
| 826 | } | ||
| 827 | } | ||
| 828 | |||
| 829 | ConverterImpl() = default; | ||
| 830 | ~ConverterImpl() = default; | ||
| 831 | }; | ||
| 832 | |||
| 833 | struct ConverterFactory::ConverterFactoryImpl { | ||
| 834 | std::unordered_map<RenderTargetFormat, std::unique_ptr<Converter>> converters_cache; | ||
| 835 | }; | ||
| 836 | |||
| 837 | ConverterFactory::ConverterFactory() { | ||
| 838 | impl = std::make_unique<ConverterFactoryImpl>(); | ||
| 839 | } | ||
| 840 | |||
| 841 | ConverterFactory::~ConverterFactory() = default; | ||
| 842 | |||
| 843 | Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) { | ||
| 844 | auto it = impl->converters_cache.find(format); | ||
| 845 | if (it == impl->converters_cache.end()) [[unlikely]] { | ||
| 846 | return BuildConverter(format); | ||
| 847 | } | ||
| 848 | return it->second.get(); | ||
| 849 | } | ||
| 850 | |||
| 851 | class NullConverter : public Converter { | ||
| 852 | public: | ||
| 853 | void ConvertTo([[maybe_unused]] std::span<u8> input, std::span<f32> output) override { | ||
| 854 | std::fill(output.begin(), output.end(), 0.0f); | ||
| 855 | } | ||
| 856 | void ConvertFrom([[maybe_unused]] std::span<f32> input, std::span<u8> output) override { | ||
| 857 | const u8 fill_value = 0U; | ||
| 858 | std::fill(output.begin(), output.end(), fill_value); | ||
| 859 | } | ||
| 860 | }; | ||
| 861 | |||
| 862 | Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { | ||
| 863 | switch (format) { | ||
| 864 | case RenderTargetFormat::R32B32G32A32_FLOAT: | ||
| 865 | return impl->converters_cache | ||
| 866 | .emplace(format, std::make_unique<ConverterImpl<R32B32G32A32_FLOATTraits>>()) | ||
| 867 | .first->second.get(); | ||
| 868 | break; | ||
| 869 | case RenderTargetFormat::R32G32B32A32_SINT: | ||
| 870 | return impl->converters_cache | ||
| 871 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_SINTTraits>>()) | ||
| 872 | .first->second.get(); | ||
| 873 | break; | ||
| 874 | case RenderTargetFormat::R32G32B32A32_UINT: | ||
| 875 | return impl->converters_cache | ||
| 876 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_UINTTraits>>()) | ||
| 877 | .first->second.get(); | ||
| 878 | break; | ||
| 879 | case RenderTargetFormat::R16G16B16A16_UNORM: | ||
| 880 | return impl->converters_cache | ||
| 881 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UNORMTraits>>()) | ||
| 882 | .first->second.get(); | ||
| 883 | break; | ||
| 884 | case RenderTargetFormat::R16G16B16A16_SNORM: | ||
| 885 | return impl->converters_cache | ||
| 886 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SNORMTraits>>()) | ||
| 887 | .first->second.get(); | ||
| 888 | break; | ||
| 889 | case RenderTargetFormat::R16G16B16A16_SINT: | ||
| 890 | return impl->converters_cache | ||
| 891 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SINTTraits>>()) | ||
| 892 | .first->second.get(); | ||
| 893 | break; | ||
| 894 | case RenderTargetFormat::R16G16B16A16_UINT: | ||
| 895 | return impl->converters_cache | ||
| 896 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UINTTraits>>()) | ||
| 897 | .first->second.get(); | ||
| 898 | break; | ||
| 899 | case RenderTargetFormat::R16G16B16A16_FLOAT: | ||
| 900 | return impl->converters_cache | ||
| 901 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_FLOATTraits>>()) | ||
| 902 | .first->second.get(); | ||
| 903 | break; | ||
| 904 | case RenderTargetFormat::R32G32_FLOAT: | ||
| 905 | return impl->converters_cache | ||
| 906 | .emplace(format, std::make_unique<ConverterImpl<R32G32_FLOATTraits>>()) | ||
| 907 | .first->second.get(); | ||
| 908 | break; | ||
| 909 | case RenderTargetFormat::R32G32_SINT: | ||
| 910 | return impl->converters_cache | ||
| 911 | .emplace(format, std::make_unique<ConverterImpl<R32G32_SINTTraits>>()) | ||
| 912 | .first->second.get(); | ||
| 913 | break; | ||
| 914 | case RenderTargetFormat::R32G32_UINT: | ||
| 915 | return impl->converters_cache | ||
| 916 | .emplace(format, std::make_unique<ConverterImpl<R32G32_UINTTraits>>()) | ||
| 917 | .first->second.get(); | ||
| 918 | break; | ||
| 919 | case RenderTargetFormat::R16G16B16X16_FLOAT: | ||
| 920 | return impl->converters_cache | ||
| 921 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16X16_FLOATTraits>>()) | ||
| 922 | .first->second.get(); | ||
| 923 | break; | ||
| 924 | case RenderTargetFormat::A8R8G8B8_UNORM: | ||
| 925 | return impl->converters_cache | ||
| 926 | .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_UNORMTraits>>()) | ||
| 927 | .first->second.get(); | ||
| 928 | break; | ||
| 929 | case RenderTargetFormat::A8R8G8B8_SRGB: | ||
| 930 | return impl->converters_cache | ||
| 931 | .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_SRGBTraits>>()) | ||
| 932 | .first->second.get(); | ||
| 933 | break; | ||
| 934 | case RenderTargetFormat::A2B10G10R10_UNORM: | ||
| 935 | return impl->converters_cache | ||
| 936 | .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UNORMTraits>>()) | ||
| 937 | .first->second.get(); | ||
| 938 | break; | ||
| 939 | case RenderTargetFormat::A2B10G10R10_UINT: | ||
| 940 | return impl->converters_cache | ||
| 941 | .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UINTTraits>>()) | ||
| 942 | .first->second.get(); | ||
| 943 | break; | ||
| 944 | case RenderTargetFormat::A8B8G8R8_UNORM: | ||
| 945 | return impl->converters_cache | ||
| 946 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UNORMTraits>>()) | ||
| 947 | .first->second.get(); | ||
| 948 | break; | ||
| 949 | case RenderTargetFormat::A8B8G8R8_SRGB: | ||
| 950 | return impl->converters_cache | ||
| 951 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SRGBTraits>>()) | ||
| 952 | .first->second.get(); | ||
| 953 | break; | ||
| 954 | case RenderTargetFormat::A8B8G8R8_SNORM: | ||
| 955 | return impl->converters_cache | ||
| 956 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SNORMTraits>>()) | ||
| 957 | .first->second.get(); | ||
| 958 | break; | ||
| 959 | case RenderTargetFormat::A8B8G8R8_SINT: | ||
| 960 | return impl->converters_cache | ||
| 961 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SINTTraits>>()) | ||
| 962 | .first->second.get(); | ||
| 963 | break; | ||
| 964 | case RenderTargetFormat::A8B8G8R8_UINT: | ||
| 965 | return impl->converters_cache | ||
| 966 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UINTTraits>>()) | ||
| 967 | .first->second.get(); | ||
| 968 | break; | ||
| 969 | case RenderTargetFormat::R16G16_UNORM: | ||
| 970 | return impl->converters_cache | ||
| 971 | .emplace(format, std::make_unique<ConverterImpl<R16G16_UNORMTraits>>()) | ||
| 972 | .first->second.get(); | ||
| 973 | break; | ||
| 974 | case RenderTargetFormat::R16G16_SNORM: | ||
| 975 | return impl->converters_cache | ||
| 976 | .emplace(format, std::make_unique<ConverterImpl<R16G16_SNORMTraits>>()) | ||
| 977 | .first->second.get(); | ||
| 978 | break; | ||
| 979 | case RenderTargetFormat::R16G16_SINT: | ||
| 980 | return impl->converters_cache | ||
| 981 | .emplace(format, std::make_unique<ConverterImpl<R16G16_SINTTraits>>()) | ||
| 982 | .first->second.get(); | ||
| 983 | break; | ||
| 984 | case RenderTargetFormat::R16G16_UINT: | ||
| 985 | return impl->converters_cache | ||
| 986 | .emplace(format, std::make_unique<ConverterImpl<R16G16_UINTTraits>>()) | ||
| 987 | .first->second.get(); | ||
| 988 | break; | ||
| 989 | case RenderTargetFormat::R16G16_FLOAT: | ||
| 990 | return impl->converters_cache | ||
| 991 | .emplace(format, std::make_unique<ConverterImpl<R16G16_FLOATTraits>>()) | ||
| 992 | .first->second.get(); | ||
| 993 | break; | ||
| 994 | case RenderTargetFormat::B10G11R11_FLOAT: | ||
| 995 | return impl->converters_cache | ||
| 996 | .emplace(format, std::make_unique<ConverterImpl<B10G11R11_FLOATTraits>>()) | ||
| 997 | .first->second.get(); | ||
| 998 | break; | ||
| 999 | case RenderTargetFormat::R32_SINT: | ||
| 1000 | return impl->converters_cache | ||
| 1001 | .emplace(format, std::make_unique<ConverterImpl<R32_SINTTraits>>()) | ||
| 1002 | .first->second.get(); | ||
| 1003 | break; | ||
| 1004 | case RenderTargetFormat::R32_UINT: | ||
| 1005 | return impl->converters_cache | ||
| 1006 | .emplace(format, std::make_unique<ConverterImpl<R32_UINTTraits>>()) | ||
| 1007 | .first->second.get(); | ||
| 1008 | break; | ||
| 1009 | case RenderTargetFormat::R32_FLOAT: | ||
| 1010 | return impl->converters_cache | ||
| 1011 | .emplace(format, std::make_unique<ConverterImpl<R32_FLOATTraits>>()) | ||
| 1012 | .first->second.get(); | ||
| 1013 | break; | ||
| 1014 | case RenderTargetFormat::R5G6B5_UNORM: | ||
| 1015 | return impl->converters_cache | ||
| 1016 | .emplace(format, std::make_unique<ConverterImpl<R5G6B5_UNORMTraits>>()) | ||
| 1017 | .first->second.get(); | ||
| 1018 | break; | ||
| 1019 | case RenderTargetFormat::A1R5G5B5_UNORM: | ||
| 1020 | return impl->converters_cache | ||
| 1021 | .emplace(format, std::make_unique<ConverterImpl<A1R5G5B5_UNORMTraits>>()) | ||
| 1022 | .first->second.get(); | ||
| 1023 | break; | ||
| 1024 | case RenderTargetFormat::R8G8_UNORM: | ||
| 1025 | return impl->converters_cache | ||
| 1026 | .emplace(format, std::make_unique<ConverterImpl<R8G8_UNORMTraits>>()) | ||
| 1027 | .first->second.get(); | ||
| 1028 | break; | ||
| 1029 | case RenderTargetFormat::R8G8_SNORM: | ||
| 1030 | return impl->converters_cache | ||
| 1031 | .emplace(format, std::make_unique<ConverterImpl<R8G8_SNORMTraits>>()) | ||
| 1032 | .first->second.get(); | ||
| 1033 | break; | ||
| 1034 | case RenderTargetFormat::R8G8_SINT: | ||
| 1035 | return impl->converters_cache | ||
| 1036 | .emplace(format, std::make_unique<ConverterImpl<R8G8_SINTTraits>>()) | ||
| 1037 | .first->second.get(); | ||
| 1038 | break; | ||
| 1039 | case RenderTargetFormat::R8G8_UINT: | ||
| 1040 | return impl->converters_cache | ||
| 1041 | .emplace(format, std::make_unique<ConverterImpl<R8G8_UINTTraits>>()) | ||
| 1042 | .first->second.get(); | ||
| 1043 | break; | ||
| 1044 | case RenderTargetFormat::R16_UNORM: | ||
| 1045 | return impl->converters_cache | ||
| 1046 | .emplace(format, std::make_unique<ConverterImpl<R16_UNORMTraits>>()) | ||
| 1047 | .first->second.get(); | ||
| 1048 | break; | ||
| 1049 | case RenderTargetFormat::R16_SNORM: | ||
| 1050 | return impl->converters_cache | ||
| 1051 | .emplace(format, std::make_unique<ConverterImpl<R16_SNORMTraits>>()) | ||
| 1052 | .first->second.get(); | ||
| 1053 | break; | ||
| 1054 | case RenderTargetFormat::R16_SINT: | ||
| 1055 | return impl->converters_cache | ||
| 1056 | .emplace(format, std::make_unique<ConverterImpl<R16_SINTTraits>>()) | ||
| 1057 | .first->second.get(); | ||
| 1058 | break; | ||
| 1059 | case RenderTargetFormat::R16_UINT: | ||
| 1060 | return impl->converters_cache | ||
| 1061 | .emplace(format, std::make_unique<ConverterImpl<R16_UINTTraits>>()) | ||
| 1062 | .first->second.get(); | ||
| 1063 | break; | ||
| 1064 | case RenderTargetFormat::R16_FLOAT: | ||
| 1065 | return impl->converters_cache | ||
| 1066 | .emplace(format, std::make_unique<ConverterImpl<R16_FLOATTraits>>()) | ||
| 1067 | .first->second.get(); | ||
| 1068 | break; | ||
| 1069 | case RenderTargetFormat::R8_UNORM: | ||
| 1070 | return impl->converters_cache | ||
| 1071 | .emplace(format, std::make_unique<ConverterImpl<R8_UNORMTraits>>()) | ||
| 1072 | .first->second.get(); | ||
| 1073 | break; | ||
| 1074 | case RenderTargetFormat::R8_SNORM: | ||
| 1075 | return impl->converters_cache | ||
| 1076 | .emplace(format, std::make_unique<ConverterImpl<R8_SNORMTraits>>()) | ||
| 1077 | .first->second.get(); | ||
| 1078 | break; | ||
| 1079 | case RenderTargetFormat::R8_SINT: | ||
| 1080 | return impl->converters_cache | ||
| 1081 | .emplace(format, std::make_unique<ConverterImpl<R8_SINTTraits>>()) | ||
| 1082 | .first->second.get(); | ||
| 1083 | break; | ||
| 1084 | case RenderTargetFormat::R8_UINT: | ||
| 1085 | return impl->converters_cache | ||
| 1086 | .emplace(format, std::make_unique<ConverterImpl<R8_UINTTraits>>()) | ||
| 1087 | .first->second.get(); | ||
| 1088 | break; | ||
| 1089 | default: { | ||
| 1090 | UNIMPLEMENTED_MSG("This format {} converter is not implemented", format); | ||
| 1091 | return impl->converters_cache.emplace(format, std::make_unique<NullConverter>()) | ||
| 1092 | .first->second.get(); | ||
| 1093 | } | ||
| 1094 | } | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h new file mode 100644 index 000000000..03337e906 --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <memory> | ||
| 5 | #include <span> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | #pragma once | ||
| 10 | |||
| 11 | #include "video_core/gpu.h" | ||
| 12 | |||
| 13 | namespace Tegra::Engines::Blitter { | ||
| 14 | |||
| 15 | class Converter { | ||
| 16 | public: | ||
| 17 | virtual void ConvertTo(std::span<u8> input, std::span<f32> output) = 0; | ||
| 18 | virtual void ConvertFrom(std::span<f32> input, std::span<u8> output) = 0; | ||
| 19 | }; | ||
| 20 | |||
| 21 | class ConverterFactory { | ||
| 22 | public: | ||
| 23 | ConverterFactory(); | ||
| 24 | ~ConverterFactory(); | ||
| 25 | |||
| 26 | Converter* GetFormatConverter(RenderTargetFormat format); | ||
| 27 | |||
| 28 | private: | ||
| 29 | Converter* BuildConverter(RenderTargetFormat format); | ||
| 30 | |||
| 31 | struct ConverterFactoryImpl; | ||
| 32 | std::unique_ptr<ConverterFactoryImpl> impl; | ||
| 33 | }; | ||
| 34 | |||
| 35 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 354c6e429..f71a316b6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -466,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | |||
| 466 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 466 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 467 | MICROPROFILE_SCOPE(OpenGL_Blits); | 467 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 468 | std::scoped_lock lock{texture_cache.mutex}; | 468 | std::scoped_lock lock{texture_cache.mutex}; |
| 469 | texture_cache.BlitImage(dst, src, copy_config); | 469 | return texture_cache.BlitImage(dst, src, copy_config); |
| 470 | return true; | ||
| 471 | } | 470 | } |
| 472 | 471 | ||
| 473 | Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { | 472 | Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 12b13cc59..d8ad8815c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -542,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | |||
| 542 | const Tegra::Engines::Fermi2D::Surface& dst, | 542 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 543 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 543 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 544 | std::scoped_lock lock{texture_cache.mutex}; | 544 | std::scoped_lock lock{texture_cache.mutex}; |
| 545 | texture_cache.BlitImage(dst, src, copy_config); | 545 | return texture_cache.BlitImage(dst, src, copy_config); |
| 546 | return true; | ||
| 547 | } | 546 | } |
| 548 | 547 | ||
| 549 | Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { | 548 | Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8ef75fe73..8e68a2e53 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -506,10 +506,14 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz | |||
| 506 | } | 506 | } |
| 507 | 507 | ||
| 508 | template <class P> | 508 | template <class P> |
| 509 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 509 | bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 510 | const Tegra::Engines::Fermi2D::Surface& src, | 510 | const Tegra::Engines::Fermi2D::Surface& src, |
| 511 | const Tegra::Engines::Fermi2D::Config& copy) { | 511 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 512 | const BlitImages images = GetBlitImages(dst, src, copy); | 512 | const auto result = GetBlitImages(dst, src, copy); |
| 513 | if (!result) { | ||
| 514 | return false; | ||
| 515 | } | ||
| 516 | const BlitImages images = *result; | ||
| 513 | const ImageId dst_id = images.dst_id; | 517 | const ImageId dst_id = images.dst_id; |
| 514 | const ImageId src_id = images.src_id; | 518 | const ImageId src_id = images.src_id; |
| 515 | 519 | ||
| @@ -596,6 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | |||
| 596 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | 600 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, |
| 597 | copy.operation); | 601 | copy.operation); |
| 598 | } | 602 | } |
| 603 | return true; | ||
| 599 | } | 604 | } |
| 600 | 605 | ||
| 601 | template <class P> | 606 | template <class P> |
| @@ -1133,7 +1138,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1133 | } | 1138 | } |
| 1134 | 1139 | ||
| 1135 | template <class P> | 1140 | template <class P> |
| 1136 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | 1141 | std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImages( |
| 1137 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, | 1142 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, |
| 1138 | const Tegra::Engines::Fermi2D::Config& copy) { | 1143 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 1139 | 1144 | ||
| @@ -1154,6 +1159,20 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | |||
| 1154 | has_deleted_images = false; | 1159 | has_deleted_images = false; |
| 1155 | src_id = FindImage(src_info, src_addr, try_options); | 1160 | src_id = FindImage(src_info, src_addr, try_options); |
| 1156 | dst_id = FindImage(dst_info, dst_addr, try_options); | 1161 | dst_id = FindImage(dst_info, dst_addr, try_options); |
| 1162 | if (!copy.must_accelerate) { | ||
| 1163 | do { | ||
| 1164 | if (!src_id && !dst_id) { | ||
| 1165 | return std::nullopt; | ||
| 1166 | } | ||
| 1167 | if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) { | ||
| 1168 | break; | ||
| 1169 | } | ||
| 1170 | if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) { | ||
| 1171 | break; | ||
| 1172 | } | ||
| 1173 | return std::nullopt; | ||
| 1174 | } while (false); | ||
| 1175 | } | ||
| 1157 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | 1176 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; |
| 1158 | if (src_image && src_image->info.num_samples > 1) { | 1177 | if (src_image && src_image->info.num_samples > 1) { |
| 1159 | RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; | 1178 | RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; |
| @@ -1194,12 +1213,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | |||
| 1194 | dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); | 1213 | dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); |
| 1195 | } while (has_deleted_images); | 1214 | } while (has_deleted_images); |
| 1196 | } | 1215 | } |
| 1197 | return BlitImages{ | 1216 | return {BlitImages{ |
| 1198 | .dst_id = dst_id, | 1217 | .dst_id = dst_id, |
| 1199 | .src_id = src_id, | 1218 | .src_id = src_id, |
| 1200 | .dst_format = dst_info.format, | 1219 | .dst_format = dst_info.format, |
| 1201 | .src_format = src_info.format, | 1220 | .src_format = src_info.format, |
| 1202 | }; | 1221 | }}; |
| 1203 | } | 1222 | } |
| 1204 | 1223 | ||
| 1205 | template <class P> | 1224 | template <class P> |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2fa8445eb..9db7195bf 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -174,7 +174,7 @@ public: | |||
| 174 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); | 174 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); |
| 175 | 175 | ||
| 176 | /// Blit an image with the given parameters | 176 | /// Blit an image with the given parameters |
| 177 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 177 | bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 178 | const Tegra::Engines::Fermi2D::Surface& src, | 178 | const Tegra::Engines::Fermi2D::Surface& src, |
| 179 | const Tegra::Engines::Fermi2D::Config& copy); | 179 | const Tegra::Engines::Fermi2D::Config& copy); |
| 180 | 180 | ||
| @@ -285,9 +285,9 @@ private: | |||
| 285 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | 285 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); |
| 286 | 286 | ||
| 287 | /// Return a blit image pair from the given guest blit parameters | 287 | /// Return a blit image pair from the given guest blit parameters |
| 288 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, | 288 | [[nodiscard]] std::optional<BlitImages> GetBlitImages( |
| 289 | const Tegra::Engines::Fermi2D::Surface& src, | 289 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, |
| 290 | const Tegra::Engines::Fermi2D::Config& copy); | 290 | const Tegra::Engines::Fermi2D::Config& copy); |
| 291 | 291 | ||
| 292 | /// Find or create a sampler from a guest descriptor sampler | 292 | /// Find or create a sampler from a guest descriptor sampler |
| 293 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); | 293 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |