summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2022-11-05 22:26:38 +0100
committerGravatar Fernando Sahmkow2022-11-24 20:35:44 +0100
commit957840be9151e7c3b97b638cc0d10d73173c4036 (patch)
treebf3f3aa7b612265fd19db8297ee09d71c819abe7 /src
parentMerge pull request #9299 from lioncash/cast (diff)
downloadyuzu-957840be9151e7c3b97b638cc0d10d73173c4036.tar.gz
yuzu-957840be9151e7c3b97b638cc0d10d73173c4036.tar.xz
yuzu-957840be9151e7c3b97b638cc0d10d73173c4036.zip
Fermi2D: Rework blit engine and add a software blitter.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/control/channel_state.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp19
-rw-r--r--src/video_core/engines/fermi_2d.h9
-rw-r--r--src/video_core/engines/sw_blitter/blitter.cpp213
-rw-r--r--src/video_core/engines/sw_blitter/blitter.h27
-rw-r--r--src/video_core/engines/sw_blitter/converter.cpp1097
-rw-r--r--src/video_core/engines/sw_blitter/converter.h35
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp3
-rw-r--r--src/video_core/texture_cache/texture_cache.h29
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h8
12 files changed, 1431 insertions, 18 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d7f7d336c..b03a30992 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -28,6 +28,10 @@ add_library(video_core STATIC
28 dirty_flags.h 28 dirty_flags.h
29 dma_pusher.cpp 29 dma_pusher.cpp
30 dma_pusher.h 30 dma_pusher.h
31 engines/sw_blitter/blitter.cpp
32 engines/sw_blitter/blitter.h
33 engines/sw_blitter/converter.cpp
34 engines/sw_blitter/converter.h
31 engines/const_buffer_info.h 35 engines/const_buffer_info.h
32 engines/engine_interface.h 36 engines/engine_interface.h
33 engines/engine_upload.cpp 37 engines/engine_upload.cpp
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index cdecc3a91..832025d75 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) {
20 ASSERT(memory_manager); 20 ASSERT(memory_manager);
21 dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this); 21 dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
22 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); 22 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
23 fermi_2d = std::make_unique<Engines::Fermi2D>(); 23 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
24 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager); 24 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
25 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); 25 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
26 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); 26 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 453e0fb01..2c722c778 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -3,17 +3,25 @@
3 3
4#include "common/assert.h" 4#include "common/assert.h"
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "common/microprofile.h"
6#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
7#include "video_core/memory_manager.h" 8#include "video_core/engines/sw_blitter/blitter.h"
8#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
9#include "video_core/surface.h" 10#include "video_core/surface.h"
11#include "video_core/textures/decoders.h"
12
13MICROPROFILE_DECLARE(GPU_BlitEngine);
14MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128));
10 15
11using VideoCore::Surface::BytesPerBlock; 16using VideoCore::Surface::BytesPerBlock;
12using VideoCore::Surface::PixelFormatFromRenderTargetFormat; 17using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
13 18
14namespace Tegra::Engines { 19namespace Tegra::Engines {
15 20
16Fermi2D::Fermi2D() { 21using namespace Texture;
22
23Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
24 sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
17 // Nvidia's OpenGL driver seems to assume these values 25 // Nvidia's OpenGL driver seems to assume these values
18 regs.src.depth = 1; 26 regs.src.depth = 1;
19 regs.dst.depth = 1; 27 regs.dst.depth = 1;
@@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
42} 50}
43 51
44void Fermi2D::Blit() { 52void Fermi2D::Blit() {
53 MICROPROFILE_SCOPE(GPU_BlitEngine);
45 LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", 54 LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
46 regs.src.Address(), regs.dst.Address()); 55 regs.src.Address(), regs.dst.Address());
47 56
@@ -52,9 +61,12 @@ void Fermi2D::Blit() {
52 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); 61 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
53 62
54 const auto& args = regs.pixels_from_memory; 63 const auto& args = regs.pixels_from_memory;
64 constexpr s64 null_derivate = 1ULL << 32;
55 Config config{ 65 Config config{
56 .operation = regs.operation, 66 .operation = regs.operation,
57 .filter = args.sample_mode.filter, 67 .filter = args.sample_mode.filter,
68 .must_accelerate = args.du_dx != null_derivate || args.dv_dy != null_derivate ||
69 args.sample_mode.filter == Filter::Bilinear,
58 .dst_x0 = args.dst_x0, 70 .dst_x0 = args.dst_x0,
59 .dst_y0 = args.dst_y0, 71 .dst_y0 = args.dst_y0,
60 .dst_x1 = args.dst_x0 + args.dst_width, 72 .dst_x1 = args.dst_x0 + args.dst_width,
@@ -78,8 +90,9 @@ void Fermi2D::Blit() {
78 config.src_x1 -= config.src_x0; 90 config.src_x1 -= config.src_x0;
79 config.src_x0 = 0; 91 config.src_x0 = 0;
80 } 92 }
93
81 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { 94 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
82 UNIMPLEMENTED(); 95 sw_blitter->Blit(src, regs.dst, config);
83 } 96 }
84} 97}
85 98
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 1229aa35b..24b518cb5 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,6 +5,7 @@
5 5
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <memory>
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -21,6 +22,10 @@ class RasterizerInterface;
21 22
22namespace Tegra::Engines { 23namespace Tegra::Engines {
23 24
25namespace Blitter {
26class SoftwareBlitEngine;
27}
28
24/** 29/**
25 * This Engine is known as G80_2D. Documentation can be found in: 30 * This Engine is known as G80_2D. Documentation can be found in:
26 * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml 31 * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
@@ -32,7 +37,7 @@ namespace Tegra::Engines {
32 37
33class Fermi2D final : public EngineInterface { 38class Fermi2D final : public EngineInterface {
34public: 39public:
35 explicit Fermi2D(); 40 explicit Fermi2D(MemoryManager& memory_manager_);
36 ~Fermi2D() override; 41 ~Fermi2D() override;
37 42
38 /// Binds a rasterizer to this engine. 43 /// Binds a rasterizer to this engine.
@@ -286,6 +291,7 @@ public:
286 struct Config { 291 struct Config {
287 Operation operation; 292 Operation operation;
288 Filter filter; 293 Filter filter;
294 bool must_accelerate;
289 s32 dst_x0; 295 s32 dst_x0;
290 s32 dst_y0; 296 s32 dst_y0;
291 s32 dst_x1; 297 s32 dst_x1;
@@ -298,6 +304,7 @@ public:
298 304
299private: 305private:
300 VideoCore::RasterizerInterface* rasterizer = nullptr; 306 VideoCore::RasterizerInterface* rasterizer = nullptr;
307 std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
301 308
302 /// Performs the copy from the source surface to the destination surface as configured in the 309 /// Performs the copy from the source surface to the destination surface as configured in the
303 /// registers. 310 /// registers.
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp
new file mode 100644
index 000000000..caf51cbe3
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/blitter.cpp
@@ -0,0 +1,213 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <vector>
5
6#include "video_core/engines/sw_blitter/blitter.h"
7#include "video_core/engines/sw_blitter/converter.h"
8#include "video_core/memory_manager.h"
9#include "video_core/surface.h"
10#include "video_core/textures/decoders.h"
11
12namespace Tegra {
13class MemoryManager;
14}
15
16using VideoCore::Surface::BytesPerBlock;
17using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
18
19namespace Tegra::Engines::Blitter {
20
21using namespace Texture;
22
23namespace {
24
25void NeighrestNeighbor(std::span<u8> input, std::span<u8> output, u32 src_width, u32 src_height,
26 u32 dst_width, u32 dst_height, size_t bpp) {
27 const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
28 const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
29 size_t src_y = 0;
30 for (u32 y = 0; y < dst_height; y++) {
31 size_t src_x = 0;
32 for (u32 x = 0; x < dst_width; x++) {
33 const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp;
34 const size_t write_to = (y * dst_width + x) * bpp;
35
36 std::memcpy(&output[write_to], &input[read_from], bpp);
37 src_x += dx_du;
38 }
39 src_y += dy_dv;
40 }
41}
42
43void NeighrestNeighborFast(std::span<f32> input, std::span<f32> output, u32 src_width,
44 u32 src_height, u32 dst_width, u32 dst_height) {
45 const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
46 const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
47 size_t src_y = 0;
48 for (u32 y = 0; y < dst_height; y++) {
49 size_t src_x = 0;
50 for (u32 x = 0; x < dst_width; x++) {
51 const size_t read_from = ((src_y * src_width + src_x) >> 32) * 4;
52 const size_t write_to = (y * dst_width + x) * 4;
53
54 std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * 4);
55 src_x += dx_du;
56 }
57 src_y += dy_dv;
58 }
59}
60
61/*
62void Bilinear(std::span<f32> input, std::span<f32> output, size_t src_width,
63 size_t src_height, size_t dst_width, size_t dst_height) {
64 const auto inv_lerp = [](u32 coord, u32 end) { return
65static_cast<f32>(std::min(std::max(static_cast<s32>(coord), 0), end - 1)) / (end); };
66
67
68 for (u32 y = 0; y < dst_height; y++) {
69 const f32 ty_0 = inv_lerp(y, dst_extent_y);
70 const f32 ty_1 = inv_lerp(y + 1, dst_extent_y);
71 for (u32 x = 0; x < dst_width; x++) {
72 const f32 tx_0 = inv_lerp(x, dst_extent_x);
73 const f32 tx_1 = inv_lerp(x + 1, dst_extent_x);
74 const std::array<f32, 4> get_pixel = [&](f32 tx, f32 ty, u32 width, u32 height) {
75 std::array<f32, 4> result{};
76
77 return (std::llround(width * tx) + std::llround(height * ty) * width) * 4;
78 };
79 std::array<f32, 4> result{};
80
81 const size_t read_from = get_pixel(src_width, src_height);
82 const size_t write_to = get_pixel(tx_0, ty_0, dst_width, dst_height);
83
84 std::memcpy(&output[write_to], &input[read_from], bpp);
85 }
86 }
87}
88*/
89
90} // namespace
91
92struct SoftwareBlitEngine::BlitEngineImpl {
93 std::vector<u8> tmp_buffer;
94 std::vector<u8> src_buffer;
95 std::vector<u8> dst_buffer;
96 std::vector<f32> intermediate_src;
97 std::vector<f32> intermediate_dst;
98 ConverterFactory converter_factory;
99};
100
101SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_)
102 : memory_manager{memory_manager_} {
103 impl = std::make_unique<BlitEngineImpl>();
104}
105
106SoftwareBlitEngine::~SoftwareBlitEngine() = default;
107
108bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
109 Fermi2D::Config& config) {
110 UNIMPLEMENTED_IF(config.filter == Fermi2D::Filter::Bilinear);
111
112 const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) {
113 if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) {
114 return CalculateSize(true, bytes_per_pixel, surface.width, surface.height,
115 surface.depth, surface.block_height, surface.block_depth);
116 }
117 return static_cast<size_t>(surface.pitch * surface.height);
118 };
119 const auto process_pitch_linear = [](bool unpack, std::span<u8> input, std::span<u8> output,
120 u32 extent_x, u32 extent_y, u32 pitch, u32 x0, u32 y0,
121 size_t bpp) {
122 const size_t base_offset = x0 * bpp;
123 const size_t copy_size = extent_x * bpp;
124 for (u32 y = y0; y < extent_y; y++) {
125 const size_t first_offset = y * pitch + base_offset;
126 const size_t second_offset = y * extent_x * bpp;
127 u8* write_to = unpack ? &output[first_offset] : &output[second_offset];
128 const u8* read_from = unpack ? &input[second_offset] : &input[first_offset];
129 std::memcpy(write_to, read_from, copy_size);
130 }
131 };
132
133 const u32 src_extent_x = config.src_x1 - config.src_x0;
134 const u32 src_extent_y = config.src_y1 - config.src_y0;
135
136 const u32 dst_extent_x = config.dst_x1 - config.dst_x0;
137 const u32 dst_extent_y = config.dst_y1 - config.dst_y0;
138 const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
139 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
140 const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
141 impl->tmp_buffer.resize(src_size);
142 memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
143
144 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
145
146 const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
147
148 impl->src_buffer.resize(src_copy_size);
149
150 const bool no_passthrough =
151 src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y;
152
153 const auto convertion_phase_same_format = [&]() {
154 NeighrestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y,
155 dst_extent_x, dst_extent_y, dst_bytes_per_pixel);
156 };
157
158 const auto convertion_phase_ir = [&]() {
159 auto* input_converter = impl->converter_factory.GetFormatConverter(src.format);
160 impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * 4);
161 impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * 4);
162 input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src);
163
164 NeighrestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x,
165 src_extent_y, dst_extent_x, dst_extent_y);
166
167 auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format);
168 output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer);
169 };
170
171 // Do actuall Blit
172
173 impl->dst_buffer.resize(dst_copy_size);
174 if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
175 UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
176 src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
177 src_extent_y, src.block_height, src.block_depth,
178 src_extent_x * src_bytes_per_pixel);
179 } else {
180 process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
181 src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
182 }
183
184 // Conversion Phase
185 if (no_passthrough) {
186 if (src.format != dst.format) {
187 convertion_phase_ir();
188 } else {
189 convertion_phase_same_format();
190 }
191 } else {
192 impl->dst_buffer.swap(impl->src_buffer);
193 }
194
195 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
196 impl->tmp_buffer.resize(dst_size);
197 memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
198
199 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
200 SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
201 dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
202 dst_extent_y, dst.block_height, dst.block_depth,
203 dst_extent_x * dst_bytes_per_pixel);
204 } else {
205 process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
206 dst.pitch, config.dst_x0, config.dst_y0,
207 static_cast<size_t>(dst_bytes_per_pixel));
208 }
209 memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
210 return true;
211}
212
213} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h
new file mode 100644
index 000000000..3edf40c3e
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/blitter.h
@@ -0,0 +1,27 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "video_core/engines/fermi_2d.h"
7
8namespace Tegra {
9class MemoryManager;
10}
11
12namespace Tegra::Engines::Blitter {
13
14class SoftwareBlitEngine {
15public:
16 SoftwareBlitEngine(MemoryManager& memory_manager_);
17 ~SoftwareBlitEngine();
18
19 bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config);
20
21private:
22 MemoryManager& memory_manager;
23 struct BlitEngineImpl;
24 std::unique_ptr<BlitEngineImpl> impl;
25};
26
27} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp
new file mode 100644
index 000000000..2e376f430
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/converter.cpp
@@ -0,0 +1,1097 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <array>
5#include <bit>
6#include <cmath>
7#include <span>
8#include <unordered_map>
9
10#include "common/assert.h"
11#include "video_core/engines/sw_blitter/converter.h"
12#include "video_core/surface.h"
13#include "video_core/textures/decoders.h"
14
15#ifdef _MSC_VER
16#define FORCE_INLINE __forceinline
17#else
18#define FORCE_INLINE inline __attribute__((always_inline))
19#endif
20
21namespace Tegra::Engines::Blitter {
22
23enum class Swizzle : size_t {
24 R = 0,
25 G = 1,
26 B = 2,
27 A = 3,
28 None,
29};
30
31enum class ComponentType : u32 {
32 SNORM = 1,
33 UNORM = 2,
34 SINT = 3,
35 UINT = 4,
36 SNORM_FORCE_FP16 = 5,
37 UNORM_FORCE_FP16 = 6,
38 FLOAT = 7,
39 SRGB = 8,
40};
41
42namespace {
43
44constexpr std::array<f32, 256> SRGB_TO_RGB_LUT = {
45 0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f,
46 1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f,
47 3.676507e-03f, 4.024717e-03f, 4.391442e-03f, 4.776953e-03f, 5.181517e-03f, 5.605392e-03f,
48 6.048833e-03f, 6.512091e-03f, 6.995410e-03f, 7.499032e-03f, 8.023193e-03f, 8.568126e-03f,
49 9.134059e-03f, 9.721218e-03f, 1.032982e-02f, 1.096009e-02f, 1.161224e-02f, 1.228649e-02f,
50 1.298303e-02f, 1.370208e-02f, 1.444384e-02f, 1.520851e-02f, 1.599629e-02f, 1.680738e-02f,
51 1.764195e-02f, 1.850022e-02f, 1.938236e-02f, 2.028856e-02f, 2.121901e-02f, 2.217389e-02f,
52 2.315337e-02f, 2.415763e-02f, 2.518686e-02f, 2.624122e-02f, 2.732089e-02f, 2.842604e-02f,
53 2.955684e-02f, 3.071344e-02f, 3.189603e-02f, 3.310477e-02f, 3.433981e-02f, 3.560131e-02f,
54 3.688945e-02f, 3.820437e-02f, 3.954624e-02f, 4.091520e-02f, 4.231141e-02f, 4.373503e-02f,
55 4.518620e-02f, 4.666509e-02f, 4.817183e-02f, 4.970657e-02f, 5.126946e-02f, 5.286065e-02f,
56 5.448028e-02f, 5.612849e-02f, 5.780543e-02f, 5.951124e-02f, 6.124605e-02f, 6.301001e-02f,
57 6.480327e-02f, 6.662594e-02f, 6.847817e-02f, 7.036009e-02f, 7.227185e-02f, 7.421357e-02f,
58 7.618538e-02f, 7.818742e-02f, 8.021982e-02f, 8.228271e-02f, 8.437621e-02f, 8.650046e-02f,
59 8.865558e-02f, 9.084171e-02f, 9.305897e-02f, 9.530747e-02f, 9.758735e-02f, 9.989873e-02f,
60 1.022417e-01f, 1.046165e-01f, 1.070231e-01f, 1.094617e-01f, 1.119324e-01f, 1.144354e-01f,
61 1.169707e-01f, 1.195384e-01f, 1.221388e-01f, 1.247718e-01f, 1.274377e-01f, 1.301365e-01f,
62 1.328683e-01f, 1.356333e-01f, 1.384316e-01f, 1.412633e-01f, 1.441285e-01f, 1.470273e-01f,
63 1.499598e-01f, 1.529261e-01f, 1.559265e-01f, 1.589608e-01f, 1.620294e-01f, 1.651322e-01f,
64 1.682694e-01f, 1.714411e-01f, 1.746474e-01f, 1.778884e-01f, 1.811642e-01f, 1.844750e-01f,
65 1.878208e-01f, 1.912017e-01f, 1.946178e-01f, 1.980693e-01f, 2.015563e-01f, 2.050787e-01f,
66 2.086369e-01f, 2.122308e-01f, 2.158605e-01f, 2.195262e-01f, 2.232280e-01f, 2.269659e-01f,
67 2.307401e-01f, 2.345506e-01f, 2.383976e-01f, 2.422811e-01f, 2.462013e-01f, 2.501583e-01f,
68 2.541521e-01f, 2.581829e-01f, 2.622507e-01f, 2.663556e-01f, 2.704978e-01f, 2.746773e-01f,
69 2.788943e-01f, 2.831487e-01f, 2.874408e-01f, 2.917706e-01f, 2.961383e-01f, 3.005438e-01f,
70 3.049873e-01f, 3.094689e-01f, 3.139887e-01f, 3.185468e-01f, 3.231432e-01f, 3.277781e-01f,
71 3.324515e-01f, 3.371636e-01f, 3.419144e-01f, 3.467041e-01f, 3.515326e-01f, 3.564001e-01f,
72 3.613068e-01f, 3.662526e-01f, 3.712377e-01f, 3.762621e-01f, 3.813260e-01f, 3.864294e-01f,
73 3.915725e-01f, 3.967552e-01f, 4.019778e-01f, 4.072402e-01f, 4.125426e-01f, 4.178851e-01f,
74 4.232677e-01f, 4.286905e-01f, 4.341536e-01f, 4.396572e-01f, 4.452012e-01f, 4.507858e-01f,
75 4.564110e-01f, 4.620770e-01f, 4.677838e-01f, 4.735315e-01f, 4.793202e-01f, 4.851499e-01f,
76 4.910209e-01f, 4.969330e-01f, 5.028865e-01f, 5.088813e-01f, 5.149177e-01f, 5.209956e-01f,
77 5.271151e-01f, 5.332764e-01f, 5.394795e-01f, 5.457245e-01f, 5.520114e-01f, 5.583404e-01f,
78 5.647115e-01f, 5.711249e-01f, 5.775805e-01f, 5.840784e-01f, 5.906188e-01f, 5.972018e-01f,
79 6.038274e-01f, 6.104956e-01f, 6.172066e-01f, 6.239604e-01f, 6.307572e-01f, 6.375968e-01f,
80 6.444797e-01f, 6.514056e-01f, 6.583748e-01f, 6.653873e-01f, 6.724432e-01f, 6.795425e-01f,
81 6.866853e-01f, 6.938717e-01f, 7.011019e-01f, 7.083758e-01f, 7.156935e-01f, 7.230551e-01f,
82 7.304608e-01f, 7.379104e-01f, 7.454042e-01f, 7.529422e-01f, 7.605245e-01f, 7.681512e-01f,
83 7.758222e-01f, 7.835378e-01f, 7.912979e-01f, 7.991027e-01f, 8.069522e-01f, 8.148466e-01f,
84 8.227857e-01f, 8.307699e-01f, 8.387990e-01f, 8.468732e-01f, 8.549926e-01f, 8.631572e-01f,
85 8.713671e-01f, 8.796224e-01f, 8.879231e-01f, 8.962694e-01f, 9.046612e-01f, 9.130986e-01f,
86 9.215819e-01f, 9.301109e-01f, 9.386857e-01f, 9.473065e-01f, 9.559733e-01f, 9.646863e-01f,
87 9.734453e-01f, 9.822506e-01f, 9.911021e-01f, 1.000000e+00f};
88
89constexpr std::array<f32, 256> RGB_TO_SRGB_LUT = {
90 0.000000e+00f, 4.984009e-02f, 8.494473e-02f, 1.107021e-01f, 1.318038e-01f, 1.500052e-01f,
91 1.661857e-01f, 1.808585e-01f, 1.943532e-01f, 2.068957e-01f, 2.186491e-01f, 2.297351e-01f,
92 2.402475e-01f, 2.502604e-01f, 2.598334e-01f, 2.690152e-01f, 2.778465e-01f, 2.863614e-01f,
93 2.945889e-01f, 3.025538e-01f, 3.102778e-01f, 3.177796e-01f, 3.250757e-01f, 3.321809e-01f,
94 3.391081e-01f, 3.458689e-01f, 3.524737e-01f, 3.589320e-01f, 3.652521e-01f, 3.714419e-01f,
95 3.775084e-01f, 3.834581e-01f, 3.892968e-01f, 3.950301e-01f, 4.006628e-01f, 4.061998e-01f,
96 4.116451e-01f, 4.170030e-01f, 4.222770e-01f, 4.274707e-01f, 4.325873e-01f, 4.376298e-01f,
97 4.426010e-01f, 4.475037e-01f, 4.523403e-01f, 4.571131e-01f, 4.618246e-01f, 4.664766e-01f,
98 4.710712e-01f, 4.756104e-01f, 4.800958e-01f, 4.845292e-01f, 4.889122e-01f, 4.932462e-01f,
99 4.975329e-01f, 5.017734e-01f, 5.059693e-01f, 5.101216e-01f, 5.142317e-01f, 5.183006e-01f,
100 5.223295e-01f, 5.263194e-01f, 5.302714e-01f, 5.341862e-01f, 5.380651e-01f, 5.419087e-01f,
101 5.457181e-01f, 5.494938e-01f, 5.532369e-01f, 5.569480e-01f, 5.606278e-01f, 5.642771e-01f,
102 5.678965e-01f, 5.714868e-01f, 5.750484e-01f, 5.785821e-01f, 5.820884e-01f, 5.855680e-01f,
103 5.890211e-01f, 5.924487e-01f, 5.958509e-01f, 5.992285e-01f, 6.025819e-01f, 6.059114e-01f,
104 6.092176e-01f, 6.125010e-01f, 6.157619e-01f, 6.190008e-01f, 6.222180e-01f, 6.254140e-01f,
105 6.285890e-01f, 6.317436e-01f, 6.348780e-01f, 6.379926e-01f, 6.410878e-01f, 6.441637e-01f,
106 6.472208e-01f, 6.502595e-01f, 6.532799e-01f, 6.562824e-01f, 6.592672e-01f, 6.622347e-01f,
107 6.651851e-01f, 6.681187e-01f, 6.710356e-01f, 6.739363e-01f, 6.768209e-01f, 6.796897e-01f,
108 6.825429e-01f, 6.853807e-01f, 6.882034e-01f, 6.910111e-01f, 6.938041e-01f, 6.965826e-01f,
109 6.993468e-01f, 7.020969e-01f, 7.048331e-01f, 7.075556e-01f, 7.102645e-01f, 7.129600e-01f,
110 7.156424e-01f, 7.183118e-01f, 7.209683e-01f, 7.236121e-01f, 7.262435e-01f, 7.288625e-01f,
111 7.314693e-01f, 7.340640e-01f, 7.366470e-01f, 7.392181e-01f, 7.417776e-01f, 7.443256e-01f,
112 7.468624e-01f, 7.493880e-01f, 7.519025e-01f, 7.544061e-01f, 7.568989e-01f, 7.593810e-01f,
113 7.618526e-01f, 7.643137e-01f, 7.667645e-01f, 7.692052e-01f, 7.716358e-01f, 7.740564e-01f,
114 7.764671e-01f, 7.788681e-01f, 7.812595e-01f, 7.836413e-01f, 7.860138e-01f, 7.883768e-01f,
115 7.907307e-01f, 7.930754e-01f, 7.954110e-01f, 7.977377e-01f, 8.000556e-01f, 8.023647e-01f,
116 8.046651e-01f, 8.069569e-01f, 8.092403e-01f, 8.115152e-01f, 8.137818e-01f, 8.160402e-01f,
117 8.182903e-01f, 8.205324e-01f, 8.227665e-01f, 8.249926e-01f, 8.272109e-01f, 8.294214e-01f,
118 8.316242e-01f, 8.338194e-01f, 8.360070e-01f, 8.381871e-01f, 8.403597e-01f, 8.425251e-01f,
119 8.446831e-01f, 8.468339e-01f, 8.489776e-01f, 8.511142e-01f, 8.532437e-01f, 8.553662e-01f,
120 8.574819e-01f, 8.595907e-01f, 8.616927e-01f, 8.637881e-01f, 8.658767e-01f, 8.679587e-01f,
121 8.700342e-01f, 8.721032e-01f, 8.741657e-01f, 8.762218e-01f, 8.782716e-01f, 8.803151e-01f,
122 8.823524e-01f, 8.843835e-01f, 8.864085e-01f, 8.884274e-01f, 8.904402e-01f, 8.924471e-01f,
123 8.944480e-01f, 8.964431e-01f, 8.984324e-01f, 9.004158e-01f, 9.023935e-01f, 9.043654e-01f,
124 9.063318e-01f, 9.082925e-01f, 9.102476e-01f, 9.121972e-01f, 9.141413e-01f, 9.160800e-01f,
125 9.180133e-01f, 9.199412e-01f, 9.218637e-01f, 9.237810e-01f, 9.256931e-01f, 9.276000e-01f,
126 9.295017e-01f, 9.313982e-01f, 9.332896e-01f, 9.351761e-01f, 9.370575e-01f, 9.389339e-01f,
127 9.408054e-01f, 9.426719e-01f, 9.445336e-01f, 9.463905e-01f, 9.482424e-01f, 9.500897e-01f,
128 9.519322e-01f, 9.537700e-01f, 9.556032e-01f, 9.574316e-01f, 9.592555e-01f, 9.610748e-01f,
129 9.628896e-01f, 9.646998e-01f, 9.665055e-01f, 9.683068e-01f, 9.701037e-01f, 9.718961e-01f,
130 9.736842e-01f, 9.754679e-01f, 9.772474e-01f, 9.790225e-01f, 9.807934e-01f, 9.825601e-01f,
131 9.843225e-01f, 9.860808e-01f, 9.878350e-01f, 9.895850e-01f, 9.913309e-01f, 9.930727e-01f,
132 9.948106e-01f, 9.965444e-01f, 9.982741e-01f, 1.000000e+00f};
133
134} // namespace
135
136struct R32B32G32A32_FLOATTraits {
137 static constexpr size_t num_components = 4;
138 static constexpr std::array<ComponentType, num_components> component_types = {
139 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
140 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
141 static constexpr std::array<Swizzle, num_components> component_swizzle = {
142 Swizzle::A, Swizzle::G, Swizzle::B, Swizzle::R};
143};
144
145struct R32G32B32A32_SINTTraits {
146 static constexpr size_t num_components = 4;
147 static constexpr std::array<ComponentType, num_components> component_types = {
148 ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
149 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
150 static constexpr std::array<Swizzle, num_components> component_swizzle = {
151 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
152};
153
154struct R32G32B32A32_UINTTraits {
155 static constexpr size_t num_components = 4;
156 static constexpr std::array<ComponentType, num_components> component_types = {
157 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
158 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
159 static constexpr std::array<Swizzle, num_components> component_swizzle = {
160 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
161};
162
163struct R16G16B16A16_UNORMTraits {
164 static constexpr size_t num_components = 4;
165 static constexpr std::array<ComponentType, num_components> component_types = {
166 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
167 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
168 static constexpr std::array<Swizzle, num_components> component_swizzle = {
169 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
170};
171
172struct R16G16B16A16_SNORMTraits {
173 static constexpr size_t num_components = 4;
174 static constexpr std::array<ComponentType, num_components> component_types = {
175 ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM};
176 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
177 static constexpr std::array<Swizzle, num_components> component_swizzle = {
178 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
179};
180
181struct R16G16B16A16_SINTTraits {
182 static constexpr size_t num_components = 4;
183 static constexpr std::array<ComponentType, num_components> component_types = {
184 ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
185 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
186 static constexpr std::array<Swizzle, num_components> component_swizzle = {
187 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
188};
189
190struct R16G16B16A16_UINTTraits {
191 static constexpr size_t num_components = 4;
192 static constexpr std::array<ComponentType, num_components> component_types = {
193 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
194 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
195 static constexpr std::array<Swizzle, num_components> component_swizzle = {
196 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
197};
198
199struct R16G16B16A16_FLOATTraits {
200 static constexpr size_t num_components = 4;
201 static constexpr std::array<ComponentType, num_components> component_types = {
202 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
203 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
204 static constexpr std::array<Swizzle, num_components> component_swizzle = {
205 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
206};
207
208struct R32G32_FLOATTraits {
209 static constexpr size_t num_components = 2;
210 static constexpr std::array<ComponentType, num_components> component_types = {
211 ComponentType::FLOAT, ComponentType::FLOAT};
212 static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
213 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
214 Swizzle::R};
215};
216
217struct R32G32_SINTTraits {
218 static constexpr size_t num_components = 2;
219 static constexpr std::array<ComponentType, num_components> component_types = {
220 ComponentType::SINT, ComponentType::SINT};
221 static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
222 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
223 Swizzle::R};
224};
225
226struct R32G32_UINTTraits {
227 static constexpr size_t num_components = 2;
228 static constexpr std::array<ComponentType, num_components> component_types = {
229 ComponentType::UINT, ComponentType::UINT};
230 static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
231 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
232 Swizzle::R};
233};
234
235struct R16G16B16X16_FLOATTraits {
236 static constexpr size_t num_components = 4;
237 static constexpr std::array<ComponentType, num_components> component_types = {
238 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
239 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
240 static constexpr std::array<Swizzle, num_components> component_swizzle = {
241 Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R};
242};
243
244struct A8R8G8B8_UNORMTraits {
245 static constexpr size_t num_components = 4;
246 static constexpr std::array<ComponentType, num_components> component_types = {
247 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
248 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
249 static constexpr std::array<Swizzle, num_components> component_swizzle = {
250 Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A};
251};
252
253struct A8R8G8B8_SRGBTraits {
254 static constexpr size_t num_components = 4;
255 static constexpr std::array<ComponentType, num_components> component_types = {
256 ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
257 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
258 static constexpr std::array<Swizzle, num_components> component_swizzle = {
259 Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A};
260};
261
262struct A2B10G10R10_UNORMTraits {
263 static constexpr size_t num_components = 4;
264 static constexpr std::array<ComponentType, num_components> component_types = {
265 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
266 static constexpr std::array<size_t, num_components> component_sizes = {10, 10, 10, 2};
267 static constexpr std::array<Swizzle, num_components> component_swizzle = {
268 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
269};
270
271struct A2B10G10R10_UINTTraits {
272 static constexpr size_t num_components = 4;
273 static constexpr std::array<ComponentType, num_components> component_types = {
274 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
275 static constexpr std::array<size_t, num_components> component_sizes = {10, 10, 10, 2};
276 static constexpr std::array<Swizzle, num_components> component_swizzle = {
277 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
278};
279
280struct A8B8G8R8_UNORMTraits {
281 static constexpr size_t num_components = 4;
282 static constexpr std::array<ComponentType, num_components> component_types = {
283 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
284 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
285 static constexpr std::array<Swizzle, num_components> component_swizzle = {
286 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
287};
288
289struct A8B8G8R8_SRGBTraits {
290 static constexpr size_t num_components = 4;
291 static constexpr std::array<ComponentType, num_components> component_types = {
292 ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
293 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
294 static constexpr std::array<Swizzle, num_components> component_swizzle = {
295 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
296};
297
298struct A8B8G8R8_SNORMTraits {
299 static constexpr size_t num_components = 4;
300 static constexpr std::array<ComponentType, num_components> component_types = {
301 ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM};
302 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
303 static constexpr std::array<Swizzle, num_components> component_swizzle = {
304 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
305};
306
307struct A8B8G8R8_SINTTraits {
308 static constexpr size_t num_components = 4;
309 static constexpr std::array<ComponentType, num_components> component_types = {
310 ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
311 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
312 static constexpr std::array<Swizzle, num_components> component_swizzle = {
313 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
314};
315
316struct A8B8G8R8_UINTTraits {
317 static constexpr size_t num_components = 4;
318 static constexpr std::array<ComponentType, num_components> component_types = {
319 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
320 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
321 static constexpr std::array<Swizzle, num_components> component_swizzle = {
322 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
323};
324
325struct R16G16_UNORMTraits {
326 static constexpr size_t num_components = 2;
327 static constexpr std::array<ComponentType, num_components> component_types = {
328 ComponentType::UNORM, ComponentType::UNORM};
329 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
330 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
331 Swizzle::R};
332};
333
334struct R16G16_SNORMTraits {
335 static constexpr size_t num_components = 2;
336 static constexpr std::array<ComponentType, num_components> component_types = {
337 ComponentType::SNORM, ComponentType::SNORM};
338 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
339 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
340 Swizzle::R};
341};
342
343struct R16G16_SINTTraits {
344 static constexpr size_t num_components = 2;
345 static constexpr std::array<ComponentType, num_components> component_types = {
346 ComponentType::SINT, ComponentType::SINT};
347 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
348 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
349 Swizzle::R};
350};
351
352struct R16G16_UINTTraits {
353 static constexpr size_t num_components = 2;
354 static constexpr std::array<ComponentType, num_components> component_types = {
355 ComponentType::UINT, ComponentType::UINT};
356 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
357 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
358 Swizzle::R};
359};
360
361struct R16G16_FLOATTraits {
362 static constexpr size_t num_components = 2;
363 static constexpr std::array<ComponentType, num_components> component_types = {
364 ComponentType::FLOAT, ComponentType::FLOAT};
365 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
366 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
367 Swizzle::R};
368};
369
370struct B10G11R11_FLOATTraits {
371 static constexpr size_t num_components = 3;
372 static constexpr std::array<ComponentType, num_components> component_types = {
373 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
374 static constexpr std::array<size_t, num_components> component_sizes = {11, 11, 10};
375 static constexpr std::array<Swizzle, num_components> component_swizzle = {
376 Swizzle::R, Swizzle::G, Swizzle::B};
377};
378
379struct R32_SINTTraits {
380 static constexpr size_t num_components = 1;
381 static constexpr std::array<ComponentType, num_components> component_types = {
382 ComponentType::SINT};
383 static constexpr std::array<size_t, num_components> component_sizes = {32};
384 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
385};
386
387struct R32_UINTTraits {
388 static constexpr size_t num_components = 1;
389 static constexpr std::array<ComponentType, num_components> component_types = {
390 ComponentType::UINT};
391 static constexpr std::array<size_t, num_components> component_sizes = {32};
392 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
393};
394
395struct R32_FLOATTraits {
396 static constexpr size_t num_components = 1;
397 static constexpr std::array<ComponentType, num_components> component_types = {
398 ComponentType::FLOAT};
399 static constexpr std::array<size_t, num_components> component_sizes = {32};
400 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
401};
402
403struct R5G6B5_UNORMTraits {
404 static constexpr size_t num_components = 3;
405 static constexpr std::array<ComponentType, num_components> component_types = {
406 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
407 static constexpr std::array<size_t, num_components> component_sizes = {5, 6, 5};
408 static constexpr std::array<Swizzle, num_components> component_swizzle = {
409 Swizzle::B, Swizzle::G, Swizzle::R};
410};
411
412struct A1R5G5B5_UNORMTraits {
413 static constexpr size_t num_components = 4;
414 static constexpr std::array<ComponentType, num_components> component_types = {
415 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
416 static constexpr std::array<size_t, num_components> component_sizes = {5, 5, 5, 1};
417 static constexpr std::array<Swizzle, num_components> component_swizzle = {
418 Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A};
419};
420
421struct R8G8_UNORMTraits {
422 static constexpr size_t num_components = 2;
423 static constexpr std::array<ComponentType, num_components> component_types = {
424 ComponentType::UNORM, ComponentType::UNORM};
425 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
426 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
427 Swizzle::R};
428};
429
430struct R8G8_SNORMTraits {
431 static constexpr size_t num_components = 2;
432 static constexpr std::array<ComponentType, num_components> component_types = {
433 ComponentType::SNORM, ComponentType::SNORM};
434 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
435 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
436 Swizzle::R};
437};
438
439struct R8G8_SINTTraits {
440 static constexpr size_t num_components = 2;
441 static constexpr std::array<ComponentType, num_components> component_types = {
442 ComponentType::SINT, ComponentType::SINT};
443 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
444 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
445 Swizzle::R};
446};
447
448struct R8G8_UINTTraits {
449 static constexpr size_t num_components = 2;
450 static constexpr std::array<ComponentType, num_components> component_types = {
451 ComponentType::UINT, ComponentType::UINT};
452 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
453 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::G,
454 Swizzle::R};
455};
456
457struct R16_UNORMTraits {
458 static constexpr size_t num_components = 1;
459 static constexpr std::array<ComponentType, num_components> component_types = {
460 ComponentType::UNORM};
461 static constexpr std::array<size_t, num_components> component_sizes = {16};
462 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
463};
464
465struct R16_SNORMTraits {
466 static constexpr size_t num_components = 1;
467 static constexpr std::array<ComponentType, num_components> component_types = {
468 ComponentType::SNORM};
469 static constexpr std::array<size_t, num_components> component_sizes = {16};
470 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
471};
472
473struct R16_SINTTraits {
474 static constexpr size_t num_components = 1;
475 static constexpr std::array<ComponentType, num_components> component_types = {
476 ComponentType::SINT};
477 static constexpr std::array<size_t, num_components> component_sizes = {16};
478 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
479};
480
481struct R16_UINTTraits {
482 static constexpr size_t num_components = 1;
483 static constexpr std::array<ComponentType, num_components> component_types = {
484 ComponentType::UINT};
485 static constexpr std::array<size_t, num_components> component_sizes = {16};
486 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
487};
488
489struct R16_FLOATTraits {
490 static constexpr size_t num_components = 1;
491 static constexpr std::array<ComponentType, num_components> component_types = {
492 ComponentType::FLOAT};
493 static constexpr std::array<size_t, num_components> component_sizes = {16};
494 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
495};
496
497struct R8_UNORMTraits {
498 static constexpr size_t num_components = 1;
499 static constexpr std::array<ComponentType, num_components> component_types = {
500 ComponentType::UNORM};
501 static constexpr std::array<size_t, num_components> component_sizes = {8};
502 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
503};
504
505struct R8_SNORMTraits {
506 static constexpr size_t num_components = 1;
507 static constexpr std::array<ComponentType, num_components> component_types = {
508 ComponentType::SNORM};
509 static constexpr std::array<size_t, num_components> component_sizes = {8};
510 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
511};
512
513struct R8_SINTTraits {
514 static constexpr size_t num_components = 1;
515 static constexpr std::array<ComponentType, num_components> component_types = {
516 ComponentType::SINT};
517 static constexpr std::array<size_t, num_components> component_sizes = {8};
518 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
519};
520
521struct R8_UINTTraits {
522 static constexpr size_t num_components = 1;
523 static constexpr std::array<ComponentType, num_components> component_types = {
524 ComponentType::UINT};
525 static constexpr std::array<size_t, num_components> component_sizes = {8};
526 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
527};
528
529template <class ConverterTraits>
530class ConverterImpl : public Converter {
531private:
532 static constexpr size_t num_components = ConverterTraits::num_components;
533 static constexpr std::array<ComponentType, num_components> component_types =
534 ConverterTraits::component_types;
535 static constexpr std::array<size_t, num_components> component_sizes =
536 ConverterTraits::component_sizes;
537 static constexpr std::array<Swizzle, num_components> component_swizzle =
538 ConverterTraits::component_swizzle;
539
540 static constexpr size_t CalculateByteSize() {
541 size_t size = 0;
542 for (const size_t component_size : component_sizes) {
543 size += component_size;
544 }
545 const size_t power = (sizeof(size_t) * 8) - std::countl_zero(size) - 1ULL;
546 const size_t base_size = 1ULL << power;
547 const size_t mask = base_size - 1ULL;
548 return ((size & mask) != 0 ? base_size << 1ULL : base_size) / 8;
549 }
550
551 static constexpr size_t total_bytes_per_pixel = CalculateByteSize();
552 static constexpr size_t total_words_per_pixel =
553 (total_bytes_per_pixel + sizeof(u32) - 1U) / sizeof(u32);
554 static constexpr size_t components_per_ir_rep = 4;
555
556 template <bool get_offsets>
557 static constexpr std::array<size_t, num_components> GetBoundWordsOffsets() {
558 std::array<size_t, num_components> result;
559 result.fill(0);
560 constexpr size_t total_bits_per_word = sizeof(u32) * 8;
561 size_t accumulated_size = 0;
562 size_t count = 0;
563 for (size_t i = 0; i < num_components; i++) {
564 if constexpr (get_offsets) {
565 result[i] = accumulated_size;
566 } else {
567 result[i] = count;
568 }
569 accumulated_size += component_sizes[i];
570 if (accumulated_size > total_bits_per_word) {
571 if constexpr (get_offsets) {
572 result[i] = 0;
573 } else {
574 result[i]++;
575 }
576 count++;
577 accumulated_size = component_sizes[i];
578 }
579 }
580 return result;
581 }
582
583 static constexpr std::array<size_t, num_components> bound_words = GetBoundWordsOffsets<false>();
584 static constexpr std::array<size_t, num_components> bound_offsets =
585 GetBoundWordsOffsets<true>();
586
587 static constexpr std::array<u32, num_components> GetComponentsMask() {
588 std::array<u32, num_components> result;
589 for (size_t i = 0; i < num_components; i++) {
590 result[i] = (((u32)~0) >> (8 * sizeof(u32) - component_sizes[i])) << bound_offsets[i];
591 }
592 return result;
593 }
594
595 static constexpr std::array<u32, num_components> component_mask = GetComponentsMask();
596
597 // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function
598 // calls, it may fail to detect the benefit of inlining.
599 template <size_t which_component>
600 FORCE_INLINE void ConvertToComponent(u32 which_word, f32& out_component) {
601 const u32 value = (which_word >> bound_offsets[which_component]) &
602 static_cast<u32>((1ULL << component_sizes[which_component]) - 1ULL);
603 const auto sign_extend = [](u32 base_value, size_t bits) {
604 const size_t shift_amount = sizeof(u32) * 8 - bits;
605 s32 shifted_value = static_cast<s32>(base_value << shift_amount);
606 return shifted_value >> shift_amount;
607 };
608 const auto force_to_fp16 = [](f32 base_value) {
609 u32 tmp = std::bit_cast<u32>(base_value);
610 constexpr size_t fp32_mantissa_bits = 23;
611 constexpr size_t fp16_mantissa_bits = 10;
612 constexpr size_t mantissa_mask =
613 ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL);
614 tmp = tmp & mantissa_mask;
615 // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM
616 return std::bit_cast<f32>(tmp);
617 };
618 const auto from_fp_n = [&sign_extend](u32 base_value, size_t bits, size_t mantissa) {
619 constexpr size_t fp32_mantissa_bits = 23;
620 size_t shift_towards = fp32_mantissa_bits - mantissa;
621 const u32 new_value =
622 static_cast<u32>(sign_extend(base_value, bits) << shift_towards) & (~(1U << 31));
623 return std::bit_cast<f32>(new_value);
624 };
625 const auto calculate_snorm = [&]() {
626 return static_cast<f32>(
627 static_cast<f64>(sign_extend(value, component_sizes[which_component])) /
628 ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL));
629 };
630 const auto calculate_unorm = [&]() {
631 return static_cast<f32>(static_cast<f32>(value) /
632 ((1ULL << (component_sizes[which_component])) - 1ULL));
633 };
634 if constexpr (component_types[which_component] == ComponentType::SNORM) {
635 out_component = calculate_snorm();
636 } else if constexpr (component_types[which_component] == ComponentType::UNORM) {
637 out_component = calculate_unorm();
638 } else if constexpr (component_types[which_component] == ComponentType::SINT) {
639 out_component = static_cast<f32>(
640 static_cast<s32>(sign_extend(value, component_sizes[which_component])));
641 } else if constexpr (component_types[which_component] == ComponentType::UINT) {
642 out_component = static_cast<f32>(
643 static_cast<s32>(sign_extend(value, component_sizes[which_component])));
644 } else if constexpr (component_types[which_component] == ComponentType::SNORM_FORCE_FP16) {
645 out_component = calculate_snorm();
646 out_component = force_to_fp16(out_component);
647 } else if constexpr (component_types[which_component] == ComponentType::UNORM_FORCE_FP16) {
648 out_component = calculate_unorm();
649 out_component = force_to_fp16(out_component);
650 } else if constexpr (component_types[which_component] == ComponentType::FLOAT) {
651 if constexpr (component_sizes[which_component] == 32) {
652 out_component = std::bit_cast<f32>(value);
653 } else if constexpr (component_sizes[which_component] == 16) {
654 static constexpr u32 sign_mask = 0x8000;
655 static constexpr u32 mantissa_mask = 0x8000;
656 out_component = std::bit_cast<f32>(((value & sign_mask) << 16) |
657 (((value & 0x7c00) + 0x1C000) << 13) |
658 ((value & mantissa_mask) << 13));
659 } else {
660 out_component = from_fp_n(value, component_sizes[which_component],
661 component_sizes[which_component] - 5);
662 }
663 } else if constexpr (component_types[which_component] == ComponentType::SRGB) {
664 if constexpr (component_swizzle[which_component] == Swizzle::A) {
665 out_component = calculate_unorm();
666 } else if constexpr (component_sizes[which_component] == 8) {
667 out_component = SRGB_TO_RGB_LUT[value];
668 } else {
669 out_component = calculate_unorm();
670 UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented",
671 component_sizes[which_component]);
672 }
673 }
674 }
675
676 // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function
677 // calls, it may fail to detect the benefit of inlining.
678 template <size_t which_component>
679 FORCE_INLINE void ConvertFromComponent(u32& which_word, f32 in_component) {
680 const auto insert_to_word = [&]<typename T>(T new_word) {
681 which_word |= (static_cast<u32>(new_word) << bound_offsets[which_component]) &
682 component_mask[which_component];
683 };
684 const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) {
685 constexpr size_t fp32_mantissa_bits = 23;
686 u32 tmp_value = std::bit_cast<u32>(std::max(base_value, 0.0f));
687 size_t shift_towards = fp32_mantissa_bits - mantissa;
688 return tmp_value >> shift_towards;
689 };
690 const auto calculate_unorm = [&]() {
691 return static_cast<u32>(static_cast<f32>(in_component) *
692 ((1ULL << (component_sizes[which_component])) - 1ULL));
693 };
694 if constexpr (component_types[which_component] == ComponentType::SNORM ||
695 component_types[which_component] == ComponentType::SNORM_FORCE_FP16) {
696 s32 tmp_word =
697 static_cast<s32>(static_cast<f64>(in_component) *
698 ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL));
699 insert_to_word(tmp_word);
700
701 } else if constexpr (component_types[which_component] == ComponentType::UNORM ||
702 component_types[which_component] == ComponentType::UNORM_FORCE_FP16) {
703 u32 tmp_word = calculate_unorm();
704 insert_to_word(tmp_word);
705 } else if constexpr (component_types[which_component] == ComponentType::SINT) {
706 s32 tmp_word = static_cast<s32>(in_component);
707 insert_to_word(tmp_word);
708 } else if constexpr (component_types[which_component] == ComponentType::UINT) {
709 u32 tmp_word = static_cast<u32>(in_component);
710 insert_to_word(tmp_word);
711 } else if constexpr (component_types[which_component] == ComponentType::FLOAT) {
712 if constexpr (component_sizes[which_component] == 32) {
713 u32 tmp_word = std::bit_cast<u32>(in_component);
714 insert_to_word(tmp_word);
715 } else if constexpr (component_sizes[which_component] == 16) {
716 static constexpr u32 sign_mask = 0x8000;
717 static constexpr u32 mantissa_mask = 0x8000;
718 const u32 tmp_word = std::bit_cast<u32>(in_component);
719 const u32 half = ((tmp_word >> 16) & sign_mask) |
720 ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & 0x7c00) |
721 ((tmp_word >> 13) & 0x03ff);
722 insert_to_word(half);
723 } else {
724 insert_to_word(to_fp_n(in_component, component_sizes[which_component],
725 component_sizes[which_component] - 5));
726 }
727 } else if constexpr (component_types[which_component] == ComponentType::SRGB) {
728 if constexpr (component_swizzle[which_component] != Swizzle::A) {
729 if constexpr (component_sizes[which_component] == 8) {
730 const u32 index = calculate_unorm();
731 in_component = RGB_TO_SRGB_LUT[index];
732 } else {
733 UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented",
734 component_sizes[which_component]);
735 }
736 }
737 const u32 tmp_word = calculate_unorm();
738 insert_to_word(tmp_word);
739 }
740 }
741
742public:
743 void ConvertTo(std::span<u8> input, std::span<f32> output) override {
744 const size_t num_pixels = output.size() / components_per_ir_rep;
745 for (size_t pixel = 0; pixel < num_pixels; pixel++) {
746 std::array<u32, total_words_per_pixel> words{};
747
748 std::memcpy(words.data(), &input[pixel * total_bytes_per_pixel], total_bytes_per_pixel);
749 std::span<f32> new_components(&output[pixel * components_per_ir_rep],
750 components_per_ir_rep);
751 if constexpr (component_swizzle[0] != Swizzle::None) {
752 ConvertToComponent<0>(words[bound_words[0]],
753 new_components[static_cast<size_t>(component_swizzle[0])]);
754 } else {
755 new_components[0] = 0.0f;
756 }
757 if constexpr (num_components >= 2) {
758 if constexpr (component_swizzle[1] != Swizzle::None) {
759 ConvertToComponent<1>(
760 words[bound_words[1]],
761 new_components[static_cast<size_t>(component_swizzle[1])]);
762 } else {
763 new_components[1] = 0.0f;
764 }
765 } else {
766 new_components[1] = 0.0f;
767 }
768 if constexpr (num_components >= 3) {
769 if constexpr (component_swizzle[2] != Swizzle::None) {
770 ConvertToComponent<2>(
771 words[bound_words[2]],
772 new_components[static_cast<size_t>(component_swizzle[2])]);
773 } else {
774 new_components[2] = 0.0f;
775 }
776 } else {
777 new_components[2] = 0.0f;
778 }
779 if constexpr (num_components >= 4) {
780 if constexpr (component_swizzle[3] != Swizzle::None) {
781 ConvertToComponent<3>(
782 words[bound_words[3]],
783 new_components[static_cast<size_t>(component_swizzle[3])]);
784 } else {
785 new_components[3] = 0.0f;
786 }
787 } else {
788 new_components[3] = 0.0f;
789 }
790 }
791 }
792
793 void ConvertFrom(std::span<f32> input, std::span<u8> output) override {
794 const size_t num_pixels = output.size() / total_bytes_per_pixel;
795 for (size_t pixel = 0; pixel < num_pixels; pixel++) {
796 std::span<f32> old_components(&input[pixel * components_per_ir_rep],
797 components_per_ir_rep);
798 std::array<u32, total_words_per_pixel> words{};
799 if constexpr (component_swizzle[0] != Swizzle::None) {
800 ConvertFromComponent<0>(words[bound_words[0]],
801 old_components[static_cast<size_t>(component_swizzle[0])]);
802 }
803 if constexpr (num_components >= 2) {
804 if constexpr (component_swizzle[1] != Swizzle::None) {
805 ConvertFromComponent<1>(
806 words[bound_words[1]],
807 old_components[static_cast<size_t>(component_swizzle[1])]);
808 }
809 }
810 if constexpr (num_components >= 3) {
811 if constexpr (component_swizzle[2] != Swizzle::None) {
812 ConvertFromComponent<2>(
813 words[bound_words[2]],
814 old_components[static_cast<size_t>(component_swizzle[2])]);
815 }
816 }
817 if constexpr (num_components >= 4) {
818 if constexpr (component_swizzle[3] != Swizzle::None) {
819 ConvertFromComponent<3>(
820 words[bound_words[3]],
821 old_components[static_cast<size_t>(component_swizzle[3])]);
822 }
823 }
824 std::memcpy(&output[pixel * total_bytes_per_pixel], words.data(),
825 total_bytes_per_pixel);
826 }
827 }
828
829 ConverterImpl() = default;
830 ~ConverterImpl() = default;
831};
832
833struct ConverterFactory::ConverterFactoryImpl {
834 std::unordered_map<RenderTargetFormat, std::unique_ptr<Converter>> converters_cache;
835};
836
837ConverterFactory::ConverterFactory() {
838 impl = std::make_unique<ConverterFactoryImpl>();
839}
840
841ConverterFactory::~ConverterFactory() = default;
842
843Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) {
844 auto it = impl->converters_cache.find(format);
845 if (it == impl->converters_cache.end()) [[unlikely]] {
846 return BuildConverter(format);
847 }
848 return it->second.get();
849}
850
851class NullConverter : public Converter {
852public:
853 void ConvertTo([[maybe_unused]] std::span<u8> input, std::span<f32> output) override {
854 std::fill(output.begin(), output.end(), 0.0f);
855 }
856 void ConvertFrom([[maybe_unused]] std::span<f32> input, std::span<u8> output) override {
857 const u8 fill_value = 0U;
858 std::fill(output.begin(), output.end(), fill_value);
859 }
860};
861
862Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) {
863 switch (format) {
864 case RenderTargetFormat::R32B32G32A32_FLOAT:
865 return impl->converters_cache
866 .emplace(format, std::make_unique<ConverterImpl<R32B32G32A32_FLOATTraits>>())
867 .first->second.get();
868 break;
869 case RenderTargetFormat::R32G32B32A32_SINT:
870 return impl->converters_cache
871 .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_SINTTraits>>())
872 .first->second.get();
873 break;
874 case RenderTargetFormat::R32G32B32A32_UINT:
875 return impl->converters_cache
876 .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_UINTTraits>>())
877 .first->second.get();
878 break;
879 case RenderTargetFormat::R16G16B16A16_UNORM:
880 return impl->converters_cache
881 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UNORMTraits>>())
882 .first->second.get();
883 break;
884 case RenderTargetFormat::R16G16B16A16_SNORM:
885 return impl->converters_cache
886 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SNORMTraits>>())
887 .first->second.get();
888 break;
889 case RenderTargetFormat::R16G16B16A16_SINT:
890 return impl->converters_cache
891 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SINTTraits>>())
892 .first->second.get();
893 break;
894 case RenderTargetFormat::R16G16B16A16_UINT:
895 return impl->converters_cache
896 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UINTTraits>>())
897 .first->second.get();
898 break;
899 case RenderTargetFormat::R16G16B16A16_FLOAT:
900 return impl->converters_cache
901 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_FLOATTraits>>())
902 .first->second.get();
903 break;
904 case RenderTargetFormat::R32G32_FLOAT:
905 return impl->converters_cache
906 .emplace(format, std::make_unique<ConverterImpl<R32G32_FLOATTraits>>())
907 .first->second.get();
908 break;
909 case RenderTargetFormat::R32G32_SINT:
910 return impl->converters_cache
911 .emplace(format, std::make_unique<ConverterImpl<R32G32_SINTTraits>>())
912 .first->second.get();
913 break;
914 case RenderTargetFormat::R32G32_UINT:
915 return impl->converters_cache
916 .emplace(format, std::make_unique<ConverterImpl<R32G32_UINTTraits>>())
917 .first->second.get();
918 break;
919 case RenderTargetFormat::R16G16B16X16_FLOAT:
920 return impl->converters_cache
921 .emplace(format, std::make_unique<ConverterImpl<R16G16B16X16_FLOATTraits>>())
922 .first->second.get();
923 break;
924 case RenderTargetFormat::A8R8G8B8_UNORM:
925 return impl->converters_cache
926 .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_UNORMTraits>>())
927 .first->second.get();
928 break;
929 case RenderTargetFormat::A8R8G8B8_SRGB:
930 return impl->converters_cache
931 .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_SRGBTraits>>())
932 .first->second.get();
933 break;
934 case RenderTargetFormat::A2B10G10R10_UNORM:
935 return impl->converters_cache
936 .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UNORMTraits>>())
937 .first->second.get();
938 break;
939 case RenderTargetFormat::A2B10G10R10_UINT:
940 return impl->converters_cache
941 .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UINTTraits>>())
942 .first->second.get();
943 break;
944 case RenderTargetFormat::A8B8G8R8_UNORM:
945 return impl->converters_cache
946 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UNORMTraits>>())
947 .first->second.get();
948 break;
949 case RenderTargetFormat::A8B8G8R8_SRGB:
950 return impl->converters_cache
951 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SRGBTraits>>())
952 .first->second.get();
953 break;
954 case RenderTargetFormat::A8B8G8R8_SNORM:
955 return impl->converters_cache
956 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SNORMTraits>>())
957 .first->second.get();
958 break;
959 case RenderTargetFormat::A8B8G8R8_SINT:
960 return impl->converters_cache
961 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SINTTraits>>())
962 .first->second.get();
963 break;
964 case RenderTargetFormat::A8B8G8R8_UINT:
965 return impl->converters_cache
966 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UINTTraits>>())
967 .first->second.get();
968 break;
969 case RenderTargetFormat::R16G16_UNORM:
970 return impl->converters_cache
971 .emplace(format, std::make_unique<ConverterImpl<R16G16_UNORMTraits>>())
972 .first->second.get();
973 break;
974 case RenderTargetFormat::R16G16_SNORM:
975 return impl->converters_cache
976 .emplace(format, std::make_unique<ConverterImpl<R16G16_SNORMTraits>>())
977 .first->second.get();
978 break;
979 case RenderTargetFormat::R16G16_SINT:
980 return impl->converters_cache
981 .emplace(format, std::make_unique<ConverterImpl<R16G16_SINTTraits>>())
982 .first->second.get();
983 break;
984 case RenderTargetFormat::R16G16_UINT:
985 return impl->converters_cache
986 .emplace(format, std::make_unique<ConverterImpl<R16G16_UINTTraits>>())
987 .first->second.get();
988 break;
989 case RenderTargetFormat::R16G16_FLOAT:
990 return impl->converters_cache
991 .emplace(format, std::make_unique<ConverterImpl<R16G16_FLOATTraits>>())
992 .first->second.get();
993 break;
994 case RenderTargetFormat::B10G11R11_FLOAT:
995 return impl->converters_cache
996 .emplace(format, std::make_unique<ConverterImpl<B10G11R11_FLOATTraits>>())
997 .first->second.get();
998 break;
999 case RenderTargetFormat::R32_SINT:
1000 return impl->converters_cache
1001 .emplace(format, std::make_unique<ConverterImpl<R32_SINTTraits>>())
1002 .first->second.get();
1003 break;
1004 case RenderTargetFormat::R32_UINT:
1005 return impl->converters_cache
1006 .emplace(format, std::make_unique<ConverterImpl<R32_UINTTraits>>())
1007 .first->second.get();
1008 break;
1009 case RenderTargetFormat::R32_FLOAT:
1010 return impl->converters_cache
1011 .emplace(format, std::make_unique<ConverterImpl<R32_FLOATTraits>>())
1012 .first->second.get();
1013 break;
1014 case RenderTargetFormat::R5G6B5_UNORM:
1015 return impl->converters_cache
1016 .emplace(format, std::make_unique<ConverterImpl<R5G6B5_UNORMTraits>>())
1017 .first->second.get();
1018 break;
1019 case RenderTargetFormat::A1R5G5B5_UNORM:
1020 return impl->converters_cache
1021 .emplace(format, std::make_unique<ConverterImpl<A1R5G5B5_UNORMTraits>>())
1022 .first->second.get();
1023 break;
1024 case RenderTargetFormat::R8G8_UNORM:
1025 return impl->converters_cache
1026 .emplace(format, std::make_unique<ConverterImpl<R8G8_UNORMTraits>>())
1027 .first->second.get();
1028 break;
1029 case RenderTargetFormat::R8G8_SNORM:
1030 return impl->converters_cache
1031 .emplace(format, std::make_unique<ConverterImpl<R8G8_SNORMTraits>>())
1032 .first->second.get();
1033 break;
1034 case RenderTargetFormat::R8G8_SINT:
1035 return impl->converters_cache
1036 .emplace(format, std::make_unique<ConverterImpl<R8G8_SINTTraits>>())
1037 .first->second.get();
1038 break;
1039 case RenderTargetFormat::R8G8_UINT:
1040 return impl->converters_cache
1041 .emplace(format, std::make_unique<ConverterImpl<R8G8_UINTTraits>>())
1042 .first->second.get();
1043 break;
1044 case RenderTargetFormat::R16_UNORM:
1045 return impl->converters_cache
1046 .emplace(format, std::make_unique<ConverterImpl<R16_UNORMTraits>>())
1047 .first->second.get();
1048 break;
1049 case RenderTargetFormat::R16_SNORM:
1050 return impl->converters_cache
1051 .emplace(format, std::make_unique<ConverterImpl<R16_SNORMTraits>>())
1052 .first->second.get();
1053 break;
1054 case RenderTargetFormat::R16_SINT:
1055 return impl->converters_cache
1056 .emplace(format, std::make_unique<ConverterImpl<R16_SINTTraits>>())
1057 .first->second.get();
1058 break;
1059 case RenderTargetFormat::R16_UINT:
1060 return impl->converters_cache
1061 .emplace(format, std::make_unique<ConverterImpl<R16_UINTTraits>>())
1062 .first->second.get();
1063 break;
1064 case RenderTargetFormat::R16_FLOAT:
1065 return impl->converters_cache
1066 .emplace(format, std::make_unique<ConverterImpl<R16_FLOATTraits>>())
1067 .first->second.get();
1068 break;
1069 case RenderTargetFormat::R8_UNORM:
1070 return impl->converters_cache
1071 .emplace(format, std::make_unique<ConverterImpl<R8_UNORMTraits>>())
1072 .first->second.get();
1073 break;
1074 case RenderTargetFormat::R8_SNORM:
1075 return impl->converters_cache
1076 .emplace(format, std::make_unique<ConverterImpl<R8_SNORMTraits>>())
1077 .first->second.get();
1078 break;
1079 case RenderTargetFormat::R8_SINT:
1080 return impl->converters_cache
1081 .emplace(format, std::make_unique<ConverterImpl<R8_SINTTraits>>())
1082 .first->second.get();
1083 break;
1084 case RenderTargetFormat::R8_UINT:
1085 return impl->converters_cache
1086 .emplace(format, std::make_unique<ConverterImpl<R8_UINTTraits>>())
1087 .first->second.get();
1088 break;
1089 default: {
1090 UNIMPLEMENTED_MSG("This format {} converter is not implemented", format);
1091 return impl->converters_cache.emplace(format, std::make_unique<NullConverter>())
1092 .first->second.get();
1093 }
1094 }
1095}
1096
1097} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h
new file mode 100644
index 000000000..03337e906
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/converter.h
@@ -0,0 +1,35 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <memory>
5#include <span>
6
7#include "common/common_types.h"
8
9#pragma once
10
11#include "video_core/gpu.h"
12
13namespace Tegra::Engines::Blitter {
14
15class Converter {
16public:
17 virtual void ConvertTo(std::span<u8> input, std::span<f32> output) = 0;
18 virtual void ConvertFrom(std::span<f32> input, std::span<u8> output) = 0;
19};
20
21class ConverterFactory {
22public:
23 ConverterFactory();
24 ~ConverterFactory();
25
26 Converter* GetFormatConverter(RenderTargetFormat format);
27
28private:
29 Converter* BuildConverter(RenderTargetFormat format);
30
31 struct ConverterFactoryImpl;
32 std::unique_ptr<ConverterFactoryImpl> impl;
33};
34
35} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 354c6e429..f71a316b6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -466,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
466 const Tegra::Engines::Fermi2D::Config& copy_config) { 466 const Tegra::Engines::Fermi2D::Config& copy_config) {
467 MICROPROFILE_SCOPE(OpenGL_Blits); 467 MICROPROFILE_SCOPE(OpenGL_Blits);
468 std::scoped_lock lock{texture_cache.mutex}; 468 std::scoped_lock lock{texture_cache.mutex};
469 texture_cache.BlitImage(dst, src, copy_config); 469 return texture_cache.BlitImage(dst, src, copy_config);
470 return true;
471} 470}
472 471
473Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { 472Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 12b13cc59..d8ad8815c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -542,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
542 const Tegra::Engines::Fermi2D::Surface& dst, 542 const Tegra::Engines::Fermi2D::Surface& dst,
543 const Tegra::Engines::Fermi2D::Config& copy_config) { 543 const Tegra::Engines::Fermi2D::Config& copy_config) {
544 std::scoped_lock lock{texture_cache.mutex}; 544 std::scoped_lock lock{texture_cache.mutex};
545 texture_cache.BlitImage(dst, src, copy_config); 545 return texture_cache.BlitImage(dst, src, copy_config);
546 return true;
547} 546}
548 547
549Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { 548Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8ef75fe73..8e68a2e53 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -506,10 +506,14 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
506} 506}
507 507
508template <class P> 508template <class P>
509void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 509bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
510 const Tegra::Engines::Fermi2D::Surface& src, 510 const Tegra::Engines::Fermi2D::Surface& src,
511 const Tegra::Engines::Fermi2D::Config& copy) { 511 const Tegra::Engines::Fermi2D::Config& copy) {
512 const BlitImages images = GetBlitImages(dst, src, copy); 512 const auto result = GetBlitImages(dst, src, copy);
513 if (!result) {
514 return false;
515 }
516 const BlitImages images = *result;
513 const ImageId dst_id = images.dst_id; 517 const ImageId dst_id = images.dst_id;
514 const ImageId src_id = images.src_id; 518 const ImageId src_id = images.src_id;
515 519
@@ -596,6 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
596 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, 600 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
597 copy.operation); 601 copy.operation);
598 } 602 }
603 return true;
599} 604}
600 605
601template <class P> 606template <class P>
@@ -1133,7 +1138,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1133} 1138}
1134 1139
1135template <class P> 1140template <class P>
1136typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( 1141std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImages(
1137 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, 1142 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
1138 const Tegra::Engines::Fermi2D::Config& copy) { 1143 const Tegra::Engines::Fermi2D::Config& copy) {
1139 1144
@@ -1154,6 +1159,20 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1154 has_deleted_images = false; 1159 has_deleted_images = false;
1155 src_id = FindImage(src_info, src_addr, try_options); 1160 src_id = FindImage(src_info, src_addr, try_options);
1156 dst_id = FindImage(dst_info, dst_addr, try_options); 1161 dst_id = FindImage(dst_info, dst_addr, try_options);
1162 if (!copy.must_accelerate) {
1163 do {
1164 if (!src_id && !dst_id) {
1165 return std::nullopt;
1166 }
1167 if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) {
1168 break;
1169 }
1170 if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) {
1171 break;
1172 }
1173 return std::nullopt;
1174 } while (false);
1175 }
1157 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; 1176 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1158 if (src_image && src_image->info.num_samples > 1) { 1177 if (src_image && src_image->info.num_samples > 1) {
1159 RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; 1178 RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
@@ -1194,12 +1213,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1194 dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); 1213 dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
1195 } while (has_deleted_images); 1214 } while (has_deleted_images);
1196 } 1215 }
1197 return BlitImages{ 1216 return {BlitImages{
1198 .dst_id = dst_id, 1217 .dst_id = dst_id,
1199 .src_id = src_id, 1218 .src_id = src_id,
1200 .dst_format = dst_info.format, 1219 .dst_format = dst_info.format,
1201 .src_format = src_info.format, 1220 .src_format = src_info.format,
1202 }; 1221 }};
1203} 1222}
1204 1223
1205template <class P> 1224template <class P>
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 2fa8445eb..9db7195bf 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -174,7 +174,7 @@ public:
174 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); 174 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
175 175
176 /// Blit an image with the given parameters 176 /// Blit an image with the given parameters
177 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 177 bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
178 const Tegra::Engines::Fermi2D::Surface& src, 178 const Tegra::Engines::Fermi2D::Surface& src,
179 const Tegra::Engines::Fermi2D::Config& copy); 179 const Tegra::Engines::Fermi2D::Config& copy);
180 180
@@ -285,9 +285,9 @@ private:
285 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); 285 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
286 286
287 /// Return a blit image pair from the given guest blit parameters 287 /// Return a blit image pair from the given guest blit parameters
288 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, 288 [[nodiscard]] std::optional<BlitImages> GetBlitImages(
289 const Tegra::Engines::Fermi2D::Surface& src, 289 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
290 const Tegra::Engines::Fermi2D::Config& copy); 290 const Tegra::Engines::Fermi2D::Config& copy);
291 291
292 /// Find or create a sampler from a guest descriptor sampler 292 /// Find or create a sampler from a guest descriptor sampler
293 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); 293 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);