summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/kernel/hle_ipc.h62
-rw-r--r--src/core/hle/service/audio/audin_u.cpp10
-rw-r--r--src/core/hle/service/audio/audout_u.cpp10
-rw-r--r--src/core/hle/service/audio/audren_u.cpp6
-rw-r--r--src/core/hle/service/audio/hwopus.cpp2
-rw-r--r--src/core/hle/service/bcat/bcat_module.cpp4
-rw-r--r--src/core/hle/service/es/es.cpp27
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp4
-rw-r--r--src/core/hle/service/ldn/ldn.cpp4
-rw-r--r--src/core/hle/service/nfc/nfc_user.cpp2
-rw-r--r--src/core/hle/service/nfp/nfp_user.cpp6
-rw-r--r--src/core/hle/service/ns/iplatform_service_manager.cpp11
-rw-r--r--src/core/hle/service/set/set.cpp2
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h4
-rw-r--r--src/video_core/control/channel_state.cpp2
-rw-r--r--src/video_core/engines/engine_upload.cpp8
-rw-r--r--src/video_core/engines/fermi_2d.cpp26
-rw-r--r--src/video_core/engines/fermi_2d.h9
-rw-r--r--src/video_core/engines/maxwell_3d.cpp8
-rw-r--r--src/video_core/engines/maxwell_dma.cpp67
-rw-r--r--src/video_core/engines/maxwell_dma.h3
-rw-r--r--src/video_core/engines/puller.cpp4
-rw-r--r--src/video_core/engines/sw_blitter/blitter.cpp238
-rw-r--r--src/video_core/engines/sw_blitter/blitter.h27
-rw-r--r--src/video_core/engines/sw_blitter/converter.cpp1234
-rw-r--r--src/video_core/engines/sw_blitter/converter.h36
-rw-r--r--src/video_core/engines/sw_blitter/generate_converters.py136
-rw-r--r--src/video_core/gpu.h18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp3
-rw-r--r--src/video_core/surface.cpp12
-rw-r--r--src/video_core/surface.h4
-rw-r--r--src/video_core/texture_cache/formatter.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h29
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h8
38 files changed, 1919 insertions, 118 deletions
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index d87be72d6..e252b5f4b 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -199,7 +199,7 @@ public:
199 ~HLERequestContext(); 199 ~HLERequestContext();
200 200
201 /// Returns a pointer to the IPC command buffer for this request. 201 /// Returns a pointer to the IPC command buffer for this request.
202 u32* CommandBuffer() { 202 [[nodiscard]] u32* CommandBuffer() {
203 return cmd_buf.data(); 203 return cmd_buf.data();
204 } 204 }
205 205
@@ -207,7 +207,7 @@ public:
207 * Returns the session through which this request was made. This can be used as a map key to 207 * Returns the session through which this request was made. This can be used as a map key to
208 * access per-client data on services. 208 * access per-client data on services.
209 */ 209 */
210 Kernel::KServerSession* Session() { 210 [[nodiscard]] Kernel::KServerSession* Session() {
211 return server_session; 211 return server_session;
212 } 212 }
213 213
@@ -217,61 +217,61 @@ public:
217 /// Writes data from this context back to the requesting process/thread. 217 /// Writes data from this context back to the requesting process/thread.
218 Result WriteToOutgoingCommandBuffer(KThread& requesting_thread); 218 Result WriteToOutgoingCommandBuffer(KThread& requesting_thread);
219 219
220 u32_le GetHipcCommand() const { 220 [[nodiscard]] u32_le GetHipcCommand() const {
221 return command; 221 return command;
222 } 222 }
223 223
224 u32_le GetTipcCommand() const { 224 [[nodiscard]] u32_le GetTipcCommand() const {
225 return static_cast<u32_le>(command_header->type.Value()) - 225 return static_cast<u32_le>(command_header->type.Value()) -
226 static_cast<u32_le>(IPC::CommandType::TIPC_CommandRegion); 226 static_cast<u32_le>(IPC::CommandType::TIPC_CommandRegion);
227 } 227 }
228 228
229 u32_le GetCommand() const { 229 [[nodiscard]] u32_le GetCommand() const {
230 return command_header->IsTipc() ? GetTipcCommand() : GetHipcCommand(); 230 return command_header->IsTipc() ? GetTipcCommand() : GetHipcCommand();
231 } 231 }
232 232
233 bool IsTipc() const { 233 [[nodiscard]] bool IsTipc() const {
234 return command_header->IsTipc(); 234 return command_header->IsTipc();
235 } 235 }
236 236
237 IPC::CommandType GetCommandType() const { 237 [[nodiscard]] IPC::CommandType GetCommandType() const {
238 return command_header->type; 238 return command_header->type;
239 } 239 }
240 240
241 u64 GetPID() const { 241 [[nodiscard]] u64 GetPID() const {
242 return pid; 242 return pid;
243 } 243 }
244 244
245 u32 GetDataPayloadOffset() const { 245 [[nodiscard]] u32 GetDataPayloadOffset() const {
246 return data_payload_offset; 246 return data_payload_offset;
247 } 247 }
248 248
249 const std::vector<IPC::BufferDescriptorX>& BufferDescriptorX() const { 249 [[nodiscard]] const std::vector<IPC::BufferDescriptorX>& BufferDescriptorX() const {
250 return buffer_x_desciptors; 250 return buffer_x_desciptors;
251 } 251 }
252 252
253 const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorA() const { 253 [[nodiscard]] const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorA() const {
254 return buffer_a_desciptors; 254 return buffer_a_desciptors;
255 } 255 }
256 256
257 const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorB() const { 257 [[nodiscard]] const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorB() const {
258 return buffer_b_desciptors; 258 return buffer_b_desciptors;
259 } 259 }
260 260
261 const std::vector<IPC::BufferDescriptorC>& BufferDescriptorC() const { 261 [[nodiscard]] const std::vector<IPC::BufferDescriptorC>& BufferDescriptorC() const {
262 return buffer_c_desciptors; 262 return buffer_c_desciptors;
263 } 263 }
264 264
265 const IPC::DomainMessageHeader& GetDomainMessageHeader() const { 265 [[nodiscard]] const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
266 return domain_message_header.value(); 266 return domain_message_header.value();
267 } 267 }
268 268
269 bool HasDomainMessageHeader() const { 269 [[nodiscard]] bool HasDomainMessageHeader() const {
270 return domain_message_header.has_value(); 270 return domain_message_header.has_value();
271 } 271 }
272 272
273 /// Helper function to read a buffer using the appropriate buffer descriptor 273 /// Helper function to read a buffer using the appropriate buffer descriptor
274 std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const; 274 [[nodiscard]] std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const;
275 275
276 /// Helper function to write a buffer using the appropriate buffer descriptor 276 /// Helper function to write a buffer using the appropriate buffer descriptor
277 std::size_t WriteBuffer(const void* buffer, std::size_t size, 277 std::size_t WriteBuffer(const void* buffer, std::size_t size,
@@ -308,22 +308,34 @@ public:
308 } 308 }
309 309
310 /// Helper function to get the size of the input buffer 310 /// Helper function to get the size of the input buffer
311 std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const; 311 [[nodiscard]] std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const;
312 312
313 /// Helper function to get the size of the output buffer 313 /// Helper function to get the size of the output buffer
314 std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const; 314 [[nodiscard]] std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const;
315
316 /// Helper function to derive the number of elements able to be contained in the read buffer
317 template <typename T>
318 [[nodiscard]] std::size_t GetReadBufferNumElements(std::size_t buffer_index = 0) const {
319 return GetReadBufferSize(buffer_index) / sizeof(T);
320 }
321
322 /// Helper function to derive the number of elements able to be contained in the write buffer
323 template <typename T>
324 [[nodiscard]] std::size_t GetWriteBufferNumElements(std::size_t buffer_index = 0) const {
325 return GetWriteBufferSize(buffer_index) / sizeof(T);
326 }
315 327
316 /// Helper function to test whether the input buffer at buffer_index can be read 328 /// Helper function to test whether the input buffer at buffer_index can be read
317 bool CanReadBuffer(std::size_t buffer_index = 0) const; 329 [[nodiscard]] bool CanReadBuffer(std::size_t buffer_index = 0) const;
318 330
319 /// Helper function to test whether the output buffer at buffer_index can be written 331 /// Helper function to test whether the output buffer at buffer_index can be written
320 bool CanWriteBuffer(std::size_t buffer_index = 0) const; 332 [[nodiscard]] bool CanWriteBuffer(std::size_t buffer_index = 0) const;
321 333
322 Handle GetCopyHandle(std::size_t index) const { 334 [[nodiscard]] Handle GetCopyHandle(std::size_t index) const {
323 return incoming_copy_handles.at(index); 335 return incoming_copy_handles.at(index);
324 } 336 }
325 337
326 Handle GetMoveHandle(std::size_t index) const { 338 [[nodiscard]] Handle GetMoveHandle(std::size_t index) const {
327 return incoming_move_handles.at(index); 339 return incoming_move_handles.at(index);
328 } 340 }
329 341
@@ -348,13 +360,13 @@ public:
348 manager = manager_; 360 manager = manager_;
349 } 361 }
350 362
351 std::string Description() const; 363 [[nodiscard]] std::string Description() const;
352 364
353 KThread& GetThread() { 365 [[nodiscard]] KThread& GetThread() {
354 return *thread; 366 return *thread;
355 } 367 }
356 368
357 std::shared_ptr<SessionRequestManager> GetManager() const { 369 [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const {
358 return manager.lock(); 370 return manager.lock();
359 } 371 }
360 372
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index 608925dfc..053e8f9dd 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -122,10 +122,10 @@ private:
122 } 122 }
123 123
124 void GetReleasedAudioInBuffer(Kernel::HLERequestContext& ctx) { 124 void GetReleasedAudioInBuffer(Kernel::HLERequestContext& ctx) {
125 auto write_buffer_size = ctx.GetWriteBufferSize() / sizeof(u64); 125 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
126 std::vector<u64> released_buffers(write_buffer_size, 0); 126 std::vector<u64> released_buffers(write_buffer_size);
127 127
128 auto count = impl->GetReleasedBuffers(released_buffers); 128 const auto count = impl->GetReleasedBuffers(released_buffers);
129 129
130 [[maybe_unused]] std::string tags{}; 130 [[maybe_unused]] std::string tags{};
131 for (u32 i = 0; i < count; i++) { 131 for (u32 i = 0; i < count; i++) {
@@ -228,7 +228,7 @@ void AudInU::ListAudioIns(Kernel::HLERequestContext& ctx) {
228 LOG_DEBUG(Service_Audio, "called"); 228 LOG_DEBUG(Service_Audio, "called");
229 229
230 const auto write_count = 230 const auto write_count =
231 static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); 231 static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>());
232 std::vector<AudioDevice::AudioDeviceName> device_names{}; 232 std::vector<AudioDevice::AudioDeviceName> device_names{};
233 233
234 u32 out_count{0}; 234 u32 out_count{0};
@@ -248,7 +248,7 @@ void AudInU::ListAudioInsAutoFiltered(Kernel::HLERequestContext& ctx) {
248 LOG_DEBUG(Service_Audio, "called"); 248 LOG_DEBUG(Service_Audio, "called");
249 249
250 const auto write_count = 250 const auto write_count =
251 static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); 251 static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>());
252 std::vector<AudioDevice::AudioDeviceName> device_names{}; 252 std::vector<AudioDevice::AudioDeviceName> device_names{};
253 253
254 u32 out_count{0}; 254 u32 out_count{0};
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 122290c6a..29751f075 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -129,16 +129,16 @@ private:
129 } 129 }
130 130
131 void GetReleasedAudioOutBuffers(Kernel::HLERequestContext& ctx) { 131 void GetReleasedAudioOutBuffers(Kernel::HLERequestContext& ctx) {
132 auto write_buffer_size = ctx.GetWriteBufferSize() / sizeof(u64); 132 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
133 std::vector<u64> released_buffers(write_buffer_size, 0); 133 std::vector<u64> released_buffers(write_buffer_size);
134 134
135 auto count = impl->GetReleasedBuffers(released_buffers); 135 const auto count = impl->GetReleasedBuffers(released_buffers);
136 136
137 [[maybe_unused]] std::string tags{}; 137 [[maybe_unused]] std::string tags{};
138 for (u32 i = 0; i < count; i++) { 138 for (u32 i = 0; i < count; i++) {
139 tags += fmt::format("{:08X}, ", released_buffers[i]); 139 tags += fmt::format("{:08X}, ", released_buffers[i]);
140 } 140 }
141 [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()}; 141 [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()};
142 LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count, 142 LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
143 tags); 143 tags);
144 144
@@ -244,7 +244,7 @@ void AudOutU::ListAudioOuts(Kernel::HLERequestContext& ctx) {
244 std::scoped_lock l{impl->mutex}; 244 std::scoped_lock l{impl->mutex};
245 245
246 const auto write_count = 246 const auto write_count =
247 static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); 247 static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>());
248 std::vector<AudioDevice::AudioDeviceName> device_names{}; 248 std::vector<AudioDevice::AudioDeviceName> device_names{};
249 if (write_count > 0) { 249 if (write_count > 0) {
250 device_names.emplace_back("DeviceOut"); 250 device_names.emplace_back("DeviceOut");
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 13423dca6..034ee273f 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -274,7 +274,7 @@ public:
274 274
275private: 275private:
276 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { 276 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
277 const size_t in_count = ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName); 277 const size_t in_count = ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>();
278 278
279 std::vector<AudioDevice::AudioDeviceName> out_names{}; 279 std::vector<AudioDevice::AudioDeviceName> out_names{};
280 280
@@ -335,7 +335,7 @@ private:
335 } 335 }
336 336
337 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { 337 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
338 const auto write_size = ctx.GetWriteBufferSize() / sizeof(char); 338 const auto write_size = ctx.GetWriteBufferSize();
339 std::string out_name{"AudioTvOutput"}; 339 std::string out_name{"AudioTvOutput"};
340 340
341 LOG_DEBUG(Service_Audio, "(STUBBED) called. Name={}", out_name); 341 LOG_DEBUG(Service_Audio, "(STUBBED) called. Name={}", out_name);
@@ -387,7 +387,7 @@ private:
387 } 387 }
388 388
389 void ListAudioOutputDeviceName(Kernel::HLERequestContext& ctx) { 389 void ListAudioOutputDeviceName(Kernel::HLERequestContext& ctx) {
390 const size_t in_count = ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName); 390 const size_t in_count = ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>();
391 391
392 std::vector<AudioDevice::AudioDeviceName> out_names{}; 392 std::vector<AudioDevice::AudioDeviceName> out_names{};
393 393
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 8bafc3a98..825fb8bcc 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -68,7 +68,7 @@ private:
68 ExtraBehavior extra_behavior) { 68 ExtraBehavior extra_behavior) {
69 u32 consumed = 0; 69 u32 consumed = 0;
70 u32 sample_count = 0; 70 u32 sample_count = 0;
71 std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); 71 std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>());
72 72
73 if (extra_behavior == ExtraBehavior::ResetContext) { 73 if (extra_behavior == ExtraBehavior::ResetContext) {
74 ResetDecoderContext(); 74 ResetDecoderContext();
diff --git a/src/core/hle/service/bcat/bcat_module.cpp b/src/core/hle/service/bcat/bcat_module.cpp
index bc08ac487..cbe690a5d 100644
--- a/src/core/hle/service/bcat/bcat_module.cpp
+++ b/src/core/hle/service/bcat/bcat_module.cpp
@@ -443,7 +443,7 @@ private:
443 } 443 }
444 444
445 void Read(Kernel::HLERequestContext& ctx) { 445 void Read(Kernel::HLERequestContext& ctx) {
446 auto write_size = ctx.GetWriteBufferSize() / sizeof(DeliveryCacheDirectoryEntry); 446 auto write_size = ctx.GetWriteBufferNumElements<DeliveryCacheDirectoryEntry>();
447 447
448 LOG_DEBUG(Service_BCAT, "called, write_size={:016X}", write_size); 448 LOG_DEBUG(Service_BCAT, "called, write_size={:016X}", write_size);
449 449
@@ -533,7 +533,7 @@ private:
533 } 533 }
534 534
535 void EnumerateDeliveryCacheDirectory(Kernel::HLERequestContext& ctx) { 535 void EnumerateDeliveryCacheDirectory(Kernel::HLERequestContext& ctx) {
536 auto size = ctx.GetWriteBufferSize() / sizeof(DirectoryName); 536 auto size = ctx.GetWriteBufferNumElements<DirectoryName>();
537 537
538 LOG_DEBUG(Service_BCAT, "called, size={:016X}", size); 538 LOG_DEBUG(Service_BCAT, "called, size={:016X}", size);
539 539
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp
index ff9b0427c..d183e5829 100644
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -192,12 +192,10 @@ private:
192 } 192 }
193 193
194 void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) { 194 void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) {
195 u32 out_entries; 195 size_t out_entries = 0;
196 if (keys.GetCommonTickets().empty()) 196 if (!keys.GetCommonTickets().empty()) {
197 out_entries = 0; 197 out_entries = ctx.GetWriteBufferNumElements<u128>();
198 else 198 }
199 out_entries = static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(u128));
200
201 LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); 199 LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries);
202 200
203 keys.PopulateTickets(); 201 keys.PopulateTickets();
@@ -206,20 +204,19 @@ private:
206 std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), 204 std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids),
207 [](const auto& pair) { return pair.first; }); 205 [](const auto& pair) { return pair.first; });
208 206
209 out_entries = static_cast<u32>(std::min<std::size_t>(ids.size(), out_entries)); 207 out_entries = std::min(ids.size(), out_entries);
210 ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); 208 ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128));
211 209
212 IPC::ResponseBuilder rb{ctx, 3}; 210 IPC::ResponseBuilder rb{ctx, 3};
213 rb.Push(ResultSuccess); 211 rb.Push(ResultSuccess);
214 rb.Push<u32>(out_entries); 212 rb.Push<u32>(static_cast<u32>(out_entries));
215 } 213 }
216 214
217 void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) { 215 void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) {
218 u32 out_entries; 216 size_t out_entries = 0;
219 if (keys.GetPersonalizedTickets().empty()) 217 if (!keys.GetPersonalizedTickets().empty()) {
220 out_entries = 0; 218 out_entries = ctx.GetWriteBufferNumElements<u128>();
221 else 219 }
222 out_entries = static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(u128));
223 220
224 LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); 221 LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries);
225 222
@@ -229,12 +226,12 @@ private:
229 std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), 226 std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids),
230 [](const auto& pair) { return pair.first; }); 227 [](const auto& pair) { return pair.first; });
231 228
232 out_entries = static_cast<u32>(std::min<std::size_t>(ids.size(), out_entries)); 229 out_entries = std::min(ids.size(), out_entries);
233 ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); 230 ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128));
234 231
235 IPC::ResponseBuilder rb{ctx, 3}; 232 IPC::ResponseBuilder rb{ctx, 3};
236 rb.Push(ResultSuccess); 233 rb.Push(ResultSuccess);
237 rb.Push<u32>(out_entries); 234 rb.Push<u32>(static_cast<u32>(out_entries));
238 } 235 }
239 236
240 void GetCommonTicketSize(Kernel::HLERequestContext& ctx) { 237 void GetCommonTicketSize(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index c08274ef9..fbb16a7da 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -277,7 +277,7 @@ private:
277 LOG_DEBUG(Service_FS, "called."); 277 LOG_DEBUG(Service_FS, "called.");
278 278
279 // Calculate how many entries we can fit in the output buffer 279 // Calculate how many entries we can fit in the output buffer
280 const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(FileSys::Entry); 280 const u64 count_entries = ctx.GetWriteBufferNumElements<FileSys::Entry>();
281 281
282 // Cap at total number of entries. 282 // Cap at total number of entries.
283 const u64 actual_entries = std::min(count_entries, entries.size() - next_entry_index); 283 const u64 actual_entries = std::min(count_entries, entries.size() - next_entry_index);
@@ -543,7 +543,7 @@ public:
543 LOG_DEBUG(Service_FS, "called"); 543 LOG_DEBUG(Service_FS, "called");
544 544
545 // Calculate how many entries we can fit in the output buffer 545 // Calculate how many entries we can fit in the output buffer
546 const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(SaveDataInfo); 546 const u64 count_entries = ctx.GetWriteBufferNumElements<SaveDataInfo>();
547 547
548 // Cap at total number of entries. 548 // Cap at total number of entries.
549 const u64 actual_entries = std::min(count_entries, info.size() - next_entry_index); 549 const u64 actual_entries = std::min(count_entries, info.size() - next_entry_index);
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index 6df563136..c49c61cff 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -292,7 +292,7 @@ public:
292 292
293 void GetNetworkInfoLatestUpdate(Kernel::HLERequestContext& ctx) { 293 void GetNetworkInfoLatestUpdate(Kernel::HLERequestContext& ctx) {
294 const std::size_t network_buffer_size = ctx.GetWriteBufferSize(0); 294 const std::size_t network_buffer_size = ctx.GetWriteBufferSize(0);
295 const std::size_t node_buffer_count = ctx.GetWriteBufferSize(1) / sizeof(NodeLatestUpdate); 295 const std::size_t node_buffer_count = ctx.GetWriteBufferNumElements<NodeLatestUpdate>(1);
296 296
297 if (node_buffer_count == 0 || network_buffer_size != sizeof(NetworkInfo)) { 297 if (node_buffer_count == 0 || network_buffer_size != sizeof(NetworkInfo)) {
298 LOG_ERROR(Service_LDN, "Invalid buffer, size = {}, count = {}", network_buffer_size, 298 LOG_ERROR(Service_LDN, "Invalid buffer, size = {}, count = {}", network_buffer_size,
@@ -333,7 +333,7 @@ public:
333 const auto channel{rp.PopEnum<WifiChannel>()}; 333 const auto channel{rp.PopEnum<WifiChannel>()};
334 const auto scan_filter{rp.PopRaw<ScanFilter>()}; 334 const auto scan_filter{rp.PopRaw<ScanFilter>()};
335 335
336 const std::size_t network_info_size = ctx.GetWriteBufferSize() / sizeof(NetworkInfo); 336 const std::size_t network_info_size = ctx.GetWriteBufferNumElements<NetworkInfo>();
337 337
338 if (network_info_size == 0) { 338 if (network_info_size == 0) {
339 LOG_ERROR(Service_LDN, "Invalid buffer size {}", network_info_size); 339 LOG_ERROR(Service_LDN, "Invalid buffer size {}", network_info_size);
diff --git a/src/core/hle/service/nfc/nfc_user.cpp b/src/core/hle/service/nfc/nfc_user.cpp
index 0753333bf..ced2d560b 100644
--- a/src/core/hle/service/nfc/nfc_user.cpp
+++ b/src/core/hle/service/nfc/nfc_user.cpp
@@ -118,7 +118,7 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {
118 } 118 }
119 119
120 std::vector<u64> nfp_devices; 120 std::vector<u64> nfp_devices;
121 const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64); 121 const std::size_t max_allowed_devices = ctx.GetWriteBufferNumElements<u64>();
122 122
123 for (auto& device : devices) { 123 for (auto& device : devices) {
124 if (nfp_devices.size() >= max_allowed_devices) { 124 if (nfp_devices.size() >= max_allowed_devices) {
diff --git a/src/core/hle/service/nfp/nfp_user.cpp b/src/core/hle/service/nfp/nfp_user.cpp
index 2fe3c0ea0..49816b4c7 100644
--- a/src/core/hle/service/nfp/nfp_user.cpp
+++ b/src/core/hle/service/nfp/nfp_user.cpp
@@ -104,9 +104,9 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {
104 } 104 }
105 105
106 std::vector<u64> nfp_devices; 106 std::vector<u64> nfp_devices;
107 const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64); 107 const std::size_t max_allowed_devices = ctx.GetWriteBufferNumElements<u64>();
108 108
109 for (auto& device : devices) { 109 for (const auto& device : devices) {
110 if (nfp_devices.size() >= max_allowed_devices) { 110 if (nfp_devices.size() >= max_allowed_devices) {
111 continue; 111 continue;
112 } 112 }
@@ -115,7 +115,7 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {
115 } 115 }
116 } 116 }
117 117
118 if (nfp_devices.size() == 0) { 118 if (nfp_devices.empty()) {
119 IPC::ResponseBuilder rb{ctx, 2}; 119 IPC::ResponseBuilder rb{ctx, 2};
120 rb.Push(DeviceNotFound); 120 rb.Push(DeviceNotFound);
121 return; 121 return;
diff --git a/src/core/hle/service/ns/iplatform_service_manager.cpp b/src/core/hle/service/ns/iplatform_service_manager.cpp
index fd047ff26..1fab2f0dd 100644
--- a/src/core/hle/service/ns/iplatform_service_manager.cpp
+++ b/src/core/hle/service/ns/iplatform_service_manager.cpp
@@ -279,13 +279,10 @@ void IPlatformServiceManager::GetSharedFontInOrderOfPriority(Kernel::HLERequestC
279 font_sizes.push_back(region.size); 279 font_sizes.push_back(region.size);
280 } 280 }
281 281
282 // Resize buffers if game requests smaller size output. 282 // Resize buffers if game requests smaller size output
283 font_codes.resize( 283 font_codes.resize(std::min(font_codes.size(), ctx.GetWriteBufferNumElements<u32>(0)));
284 std::min<std::size_t>(font_codes.size(), ctx.GetWriteBufferSize(0) / sizeof(u32))); 284 font_offsets.resize(std::min(font_offsets.size(), ctx.GetWriteBufferNumElements<u32>(1)));
285 font_offsets.resize( 285 font_sizes.resize(std::min(font_sizes.size(), ctx.GetWriteBufferNumElements<u32>(2)));
286 std::min<std::size_t>(font_offsets.size(), ctx.GetWriteBufferSize(1) / sizeof(u32)));
287 font_sizes.resize(
288 std::min<std::size_t>(font_sizes.size(), ctx.GetWriteBufferSize(2) / sizeof(u32)));
289 286
290 ctx.WriteBuffer(font_codes, 0); 287 ctx.WriteBuffer(font_codes, 0);
291 ctx.WriteBuffer(font_offsets, 1); 288 ctx.WriteBuffer(font_offsets, 1);
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index f761c2da4..4f1a8d6b7 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -83,7 +83,7 @@ void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_la
83} 83}
84 84
85void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_entries) { 85void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_entries) {
86 const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode); 86 const std::size_t requested_amount = ctx.GetWriteBufferNumElements<LanguageCode>();
87 const std::size_t max_amount = std::min(requested_amount, max_entries); 87 const std::size_t max_amount = std::min(requested_amount, max_entries);
88 const std::size_t copy_amount = std::min(available_language_codes.size(), max_amount); 88 const std::size_t copy_amount = std::min(available_language_codes.size(), max_amount);
89 const std::size_t copy_size = copy_amount * sizeof(LanguageCode); 89 const std::size_t copy_size = copy_amount * sizeof(LanguageCode);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d7f7d336c..b03a30992 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -28,6 +28,10 @@ add_library(video_core STATIC
28 dirty_flags.h 28 dirty_flags.h
29 dma_pusher.cpp 29 dma_pusher.cpp
30 dma_pusher.h 30 dma_pusher.h
31 engines/sw_blitter/blitter.cpp
32 engines/sw_blitter/blitter.h
33 engines/sw_blitter/converter.cpp
34 engines/sw_blitter/converter.h
31 engines/const_buffer_info.h 35 engines/const_buffer_info.h
32 engines/engine_interface.h 36 engines/engine_interface.h
33 engines/engine_upload.cpp 37 engines/engine_upload.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 599551013..5d3a8293b 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1742,12 +1742,12 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1742 SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); 1742 SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
1743 1743
1744 if constexpr (USE_MEMORY_MAPS) { 1744 if constexpr (USE_MEMORY_MAPS) {
1745 auto upload_staging = runtime.UploadStagingBuffer(copy_size);
1745 std::array copies{BufferCopy{ 1746 std::array copies{BufferCopy{
1746 .src_offset = 0, 1747 .src_offset = upload_staging.offset,
1747 .dst_offset = buffer.Offset(dest_address), 1748 .dst_offset = buffer.Offset(dest_address),
1748 .size = copy_size, 1749 .size = copy_size,
1749 }}; 1750 }};
1750 auto upload_staging = runtime.UploadStagingBuffer(copy_size);
1751 u8* const src_pointer = upload_staging.mapped_span.data(); 1751 u8* const src_pointer = upload_staging.mapped_span.data();
1752 std::memcpy(src_pointer, inlined_buffer.data(), copy_size); 1752 std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
1753 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 1753 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index cdecc3a91..832025d75 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) {
20 ASSERT(memory_manager); 20 ASSERT(memory_manager);
21 dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this); 21 dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
22 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); 22 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
23 fermi_2d = std::make_unique<Engines::Fermi2D>(); 23 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
24 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager); 24 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
25 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); 25 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
26 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); 26 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index a34819234..28aa85f32 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -51,11 +51,11 @@ void State::ProcessData(std::span<const u8> read_buffer) {
51 } else { 51 } else {
52 for (u32 line = 0; line < regs.line_count; ++line) { 52 for (u32 line = 0; line < regs.line_count; ++line) {
53 const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch; 53 const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
54 memory_manager.WriteBlockUnsafe( 54 std::span<const u8> buffer(read_buffer.data() +
55 dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in, 55 static_cast<size_t>(line) * regs.line_length_in,
56 regs.line_length_in); 56 regs.line_length_in);
57 rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
57 } 58 }
58 memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
59 } 59 }
60 } else { 60 } else {
61 u32 width = regs.dest.width; 61 u32 width = regs.dest.width;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 453e0fb01..c6478ae85 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -3,17 +3,25 @@
3 3
4#include "common/assert.h" 4#include "common/assert.h"
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "common/microprofile.h"
6#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
7#include "video_core/memory_manager.h" 8#include "video_core/engines/sw_blitter/blitter.h"
8#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
9#include "video_core/surface.h" 10#include "video_core/surface.h"
11#include "video_core/textures/decoders.h"
12
13MICROPROFILE_DECLARE(GPU_BlitEngine);
14MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128));
10 15
11using VideoCore::Surface::BytesPerBlock; 16using VideoCore::Surface::BytesPerBlock;
12using VideoCore::Surface::PixelFormatFromRenderTargetFormat; 17using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
13 18
14namespace Tegra::Engines { 19namespace Tegra::Engines {
15 20
16Fermi2D::Fermi2D() { 21using namespace Texture;
22
23Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
24 sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
17 // Nvidia's OpenGL driver seems to assume these values 25 // Nvidia's OpenGL driver seems to assume these values
18 regs.src.depth = 1; 26 regs.src.depth = 1;
19 regs.dst.depth = 1; 27 regs.dst.depth = 1;
@@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
42} 50}
43 51
44void Fermi2D::Blit() { 52void Fermi2D::Blit() {
53 MICROPROFILE_SCOPE(GPU_BlitEngine);
45 LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", 54 LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
46 regs.src.Address(), regs.dst.Address()); 55 regs.src.Address(), regs.dst.Address());
47 56
@@ -52,9 +61,16 @@ void Fermi2D::Blit() {
52 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); 61 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
53 62
54 const auto& args = regs.pixels_from_memory; 63 const auto& args = regs.pixels_from_memory;
64 constexpr s64 null_derivate = 1ULL << 32;
65 Surface src = regs.src;
66 const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
67 const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
68 src.format != regs.dst.format;
55 Config config{ 69 Config config{
56 .operation = regs.operation, 70 .operation = regs.operation,
57 .filter = args.sample_mode.filter, 71 .filter = args.sample_mode.filter,
72 .must_accelerate =
73 args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu,
58 .dst_x0 = args.dst_x0, 74 .dst_x0 = args.dst_x0,
59 .dst_y0 = args.dst_y0, 75 .dst_y0 = args.dst_y0,
60 .dst_x1 = args.dst_x0 + args.dst_width, 76 .dst_x1 = args.dst_x0 + args.dst_width,
@@ -64,8 +80,7 @@ void Fermi2D::Blit() {
64 .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), 80 .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
65 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), 81 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
66 }; 82 };
67 Surface src = regs.src; 83
68 const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
69 const auto need_align_to_pitch = 84 const auto need_align_to_pitch =
70 src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && 85 src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
71 static_cast<s32>(src.width) == config.src_x1 && 86 static_cast<s32>(src.width) == config.src_x1 &&
@@ -78,8 +93,9 @@ void Fermi2D::Blit() {
78 config.src_x1 -= config.src_x0; 93 config.src_x1 -= config.src_x0;
79 config.src_x0 = 0; 94 config.src_x0 = 0;
80 } 95 }
96
81 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { 97 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
82 UNIMPLEMENTED(); 98 sw_blitter->Blit(src, regs.dst, config);
83 } 99 }
84} 100}
85 101
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 1229aa35b..24b518cb5 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,6 +5,7 @@
5 5
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <memory>
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_funcs.h" 10#include "common/common_funcs.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -21,6 +22,10 @@ class RasterizerInterface;
21 22
22namespace Tegra::Engines { 23namespace Tegra::Engines {
23 24
25namespace Blitter {
26class SoftwareBlitEngine;
27}
28
24/** 29/**
25 * This Engine is known as G80_2D. Documentation can be found in: 30 * This Engine is known as G80_2D. Documentation can be found in:
26 * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml 31 * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
@@ -32,7 +37,7 @@ namespace Tegra::Engines {
32 37
33class Fermi2D final : public EngineInterface { 38class Fermi2D final : public EngineInterface {
34public: 39public:
35 explicit Fermi2D(); 40 explicit Fermi2D(MemoryManager& memory_manager_);
36 ~Fermi2D() override; 41 ~Fermi2D() override;
37 42
38 /// Binds a rasterizer to this engine. 43 /// Binds a rasterizer to this engine.
@@ -286,6 +291,7 @@ public:
286 struct Config { 291 struct Config {
287 Operation operation; 292 Operation operation;
288 Filter filter; 293 Filter filter;
294 bool must_accelerate;
289 s32 dst_x0; 295 s32 dst_x0;
290 s32 dst_y0; 296 s32 dst_y0;
291 s32 dst_x1; 297 s32 dst_x1;
@@ -298,6 +304,7 @@ public:
298 304
299private: 305private:
300 VideoCore::RasterizerInterface* rasterizer = nullptr; 306 VideoCore::RasterizerInterface* rasterizer = nullptr;
307 std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
301 308
302 /// Performs the copy from the source surface to the destination surface as configured in the 309 /// Performs the copy from the source surface to the destination surface as configured in the
303 /// registers. 310 /// registers.
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5bb1427c1..6d43e23ea 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -249,9 +249,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
249 return; 249 return;
250 case MAXWELL3D_REG_INDEX(fragment_barrier): 250 case MAXWELL3D_REG_INDEX(fragment_barrier):
251 return rasterizer->FragmentBarrier(); 251 return rasterizer->FragmentBarrier();
252 case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
253 rasterizer->InvalidateGPUCache();
254 return rasterizer->WaitForIdle();
255 case MAXWELL3D_REG_INDEX(tiled_cache_barrier): 252 case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
256 return rasterizer->TiledCacheBarrier(); 253 return rasterizer->TiledCacheBarrier();
257 } 254 }
@@ -511,10 +508,7 @@ void Maxwell3D::ProcessCounterReset() {
511 508
512void Maxwell3D::ProcessSyncPoint() { 509void Maxwell3D::ProcessSyncPoint() {
513 const u32 sync_point = regs.sync_info.sync_point.Value(); 510 const u32 sync_point = regs.sync_info.sync_point.Value();
514 const u32 cache_flush = regs.sync_info.clean_l2.Value(); 511 [[maybe_unused]] const u32 cache_flush = regs.sync_info.clean_l2.Value();
515 if (cache_flush != 0) {
516 rasterizer->InvalidateGPUCache();
517 }
518 rasterizer->SignalSyncPoint(sync_point); 512 rasterizer->SignalSyncPoint(sync_point);
519} 513}
520 514
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 1bf6ca2dd..334429514 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -62,7 +62,8 @@ void MaxwellDMA::Launch() {
62 62
63 if (!is_src_pitch && !is_dst_pitch) { 63 if (!is_src_pitch && !is_dst_pitch) {
64 // If both the source and the destination are in block layout, assert. 64 // If both the source and the destination are in block layout, assert.
65 UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); 65 CopyBlockLinearToBlockLinear();
66 ReleaseSemaphore();
66 return; 67 return;
67 } 68 }
68 69
@@ -291,6 +292,70 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
291 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 292 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
292} 293}
293 294
295void MaxwellDMA::CopyBlockLinearToBlockLinear() {
296 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
297
298 const bool is_remapping = regs.launch_dma.remap_enable != 0;
299
300 // Deswizzle the input and copy it over.
301 const Parameters& src = regs.src_params;
302 const Parameters& dst = regs.dst_params;
303
304 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
305 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
306
307 const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
308
309 u32 src_width = src.width;
310 u32 dst_width = dst.width;
311 u32 x_elements = regs.line_length_in;
312 u32 src_x_offset = src.origin.x;
313 u32 dst_x_offset = dst.origin.x;
314 u32 bpp_shift = 0U;
315 if (!is_remapping) {
316 bpp_shift = Common::FoldRight(
317 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
318 src_width, dst_width, x_elements, src_x_offset, dst_x_offset,
319 static_cast<u32>(regs.offset_in), static_cast<u32>(regs.offset_out));
320 src_width >>= bpp_shift;
321 dst_width >>= bpp_shift;
322 x_elements >>= bpp_shift;
323 src_x_offset >>= bpp_shift;
324 dst_x_offset >>= bpp_shift;
325 }
326
327 const u32 bytes_per_pixel = base_bpp << bpp_shift;
328 const size_t src_size = CalculateSize(true, bytes_per_pixel, src_width, src.height, src.depth,
329 src.block_size.height, src.block_size.depth);
330 const size_t dst_size = CalculateSize(true, bytes_per_pixel, dst_width, dst.height, dst.depth,
331 dst.block_size.height, dst.block_size.depth);
332
333 const u32 pitch = x_elements * bytes_per_pixel;
334 const size_t mid_buffer_size = pitch * regs.line_count;
335
336 if (read_buffer.size() < src_size) {
337 read_buffer.resize(src_size);
338 }
339 if (write_buffer.size() < dst_size) {
340 write_buffer.resize(dst_size);
341 }
342
343 intermediate_buffer.resize(mid_buffer_size);
344
345 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
346 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
347
348 UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height,
349 src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count,
350 src.block_size.height, src.block_size.depth, pitch);
351
352 SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height,
353 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
354 dst.block_size.height, dst.block_size.depth, pitch);
355
356 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
357}
358
294void MaxwellDMA::ReleaseSemaphore() { 359void MaxwellDMA::ReleaseSemaphore() {
295 const auto type = regs.launch_dma.semaphore_type; 360 const auto type = regs.launch_dma.semaphore_type;
296 const GPUVAddr address = regs.semaphore.address; 361 const GPUVAddr address = regs.semaphore.address;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 953e34adc..d40d3d302 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -223,6 +223,8 @@ private:
223 223
224 void CopyPitchToBlockLinear(); 224 void CopyPitchToBlockLinear();
225 225
226 void CopyBlockLinearToBlockLinear();
227
226 void FastCopyBlockLinearToPitch(); 228 void FastCopyBlockLinearToPitch();
227 229
228 void ReleaseSemaphore(); 230 void ReleaseSemaphore();
@@ -234,6 +236,7 @@ private:
234 236
235 std::vector<u8> read_buffer; 237 std::vector<u8> read_buffer;
236 std::vector<u8> write_buffer; 238 std::vector<u8> write_buffer;
239 std::vector<u8> intermediate_buffer;
237 240
238 static constexpr std::size_t NUM_REGS = 0x800; 241 static constexpr std::size_t NUM_REGS = 0x800;
239 struct Regs { 242 struct Regs {
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 4d2278811..c308ba3fc 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -118,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() {
118 std::function<void()> operation([this, sequence_address, payload] { 118 std::function<void()> operation([this, sequence_address, payload] {
119 memory_manager.Write<u32>(sequence_address, payload); 119 memory_manager.Write<u32>(sequence_address, payload);
120 }); 120 });
121 rasterizer->SyncOperation(std::move(operation)); 121 rasterizer->SignalFence(std::move(operation));
122} 122}
123 123
124void Puller::ProcessSemaphoreAcquire() { 124void Puller::ProcessSemaphoreAcquire() {
@@ -151,8 +151,8 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
151 case BufferMethods::SemaphoreAddressLow: 151 case BufferMethods::SemaphoreAddressLow:
152 case BufferMethods::SemaphoreSequencePayload: 152 case BufferMethods::SemaphoreSequencePayload:
153 case BufferMethods::SyncpointPayload: 153 case BufferMethods::SyncpointPayload:
154 break;
155 case BufferMethods::WrcacheFlush: 154 case BufferMethods::WrcacheFlush:
155 break;
156 case BufferMethods::RefCnt: 156 case BufferMethods::RefCnt:
157 rasterizer->SignalReference(); 157 rasterizer->SignalReference();
158 break; 158 break;
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp
new file mode 100644
index 000000000..2f1ea4626
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/blitter.cpp
@@ -0,0 +1,238 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <algorithm>
5#include <cmath>
6#include <vector>
7
8#include "video_core/engines/sw_blitter/blitter.h"
9#include "video_core/engines/sw_blitter/converter.h"
10#include "video_core/memory_manager.h"
11#include "video_core/surface.h"
12#include "video_core/textures/decoders.h"
13
14namespace Tegra {
15class MemoryManager;
16}
17
18using VideoCore::Surface::BytesPerBlock;
19using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
20
21namespace Tegra::Engines::Blitter {
22
23using namespace Texture;
24
25namespace {
26
27constexpr size_t ir_components = 4;
28
29void NearestNeighbor(std::span<const u8> input, std::span<u8> output, u32 src_width, u32 src_height,
30 u32 dst_width, u32 dst_height, size_t bpp) {
31 const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
32 const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
33 size_t src_y = 0;
34 for (u32 y = 0; y < dst_height; y++) {
35 size_t src_x = 0;
36 for (u32 x = 0; x < dst_width; x++) {
37 const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp;
38 const size_t write_to = (y * dst_width + x) * bpp;
39
40 std::memcpy(&output[write_to], &input[read_from], bpp);
41 src_x += dx_du;
42 }
43 src_y += dy_dv;
44 }
45}
46
47void NearestNeighborFast(std::span<const f32> input, std::span<f32> output, u32 src_width,
48 u32 src_height, u32 dst_width, u32 dst_height) {
49 const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
50 const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
51 size_t src_y = 0;
52 for (u32 y = 0; y < dst_height; y++) {
53 size_t src_x = 0;
54 for (u32 x = 0; x < dst_width; x++) {
55 const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components;
56 const size_t write_to = (y * dst_width + x) * ir_components;
57
58 std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components);
59 src_x += dx_du;
60 }
61 src_y += dy_dv;
62 }
63}
64
65void Bilinear(std::span<const f32> input, std::span<f32> output, size_t src_width,
66 size_t src_height, size_t dst_width, size_t dst_height) {
67 const auto bilinear_sample = [](std::span<const f32> x0_y0, std::span<const f32> x1_y0,
68 std::span<const f32> x0_y1, std::span<const f32> x1_y1,
69 f32 weight_x, f32 weight_y) {
70 std::array<f32, ir_components> result{};
71 for (size_t i = 0; i < ir_components; i++) {
72 const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x);
73 const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x);
74 result[i] = std::lerp(a, b, weight_y);
75 }
76 return result;
77 };
78 const f32 dx_du =
79 dst_width > 1 ? static_cast<f32>(src_width - 1) / static_cast<f32>(dst_width - 1) : 0.f;
80 const f32 dy_dv =
81 dst_height > 1 ? static_cast<f32>(src_height - 1) / static_cast<f32>(dst_height - 1) : 0.f;
82 for (u32 y = 0; y < dst_height; y++) {
83 for (u32 x = 0; x < dst_width; x++) {
84 const f32 x_low = std::floor(static_cast<f32>(x) * dx_du);
85 const f32 y_low = std::floor(static_cast<f32>(y) * dy_dv);
86 const f32 x_high = std::ceil(static_cast<f32>(x) * dx_du);
87 const f32 y_high = std::ceil(static_cast<f32>(y) * dy_dv);
88 const f32 weight_x = (static_cast<f32>(x) * dx_du) - x_low;
89 const f32 weight_y = (static_cast<f32>(y) * dy_dv) - y_low;
90
91 const auto read_src = [&](f32 in_x, f32 in_y) {
92 const size_t read_from =
93 ((static_cast<size_t>(in_x) * src_width + static_cast<size_t>(in_y)) >> 32) *
94 ir_components;
95 return std::span<const f32>(&input[read_from], ir_components);
96 };
97
98 auto x0_y0 = read_src(x_low, y_low);
99 auto x1_y0 = read_src(x_high, y_low);
100 auto x0_y1 = read_src(x_low, y_high);
101 auto x1_y1 = read_src(x_high, y_high);
102
103 const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y);
104
105 const size_t write_to = (y * dst_width + x) * ir_components;
106
107 std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components);
108 }
109 }
110}
111
112} // namespace
113
114struct SoftwareBlitEngine::BlitEngineImpl {
115 std::vector<u8> tmp_buffer;
116 std::vector<u8> src_buffer;
117 std::vector<u8> dst_buffer;
118 std::vector<f32> intermediate_src;
119 std::vector<f32> intermediate_dst;
120 ConverterFactory converter_factory;
121};
122
123SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_)
124 : memory_manager{memory_manager_} {
125 impl = std::make_unique<BlitEngineImpl>();
126}
127
128SoftwareBlitEngine::~SoftwareBlitEngine() = default;
129
130bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
131 Fermi2D::Config& config) {
132 const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) {
133 if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) {
134 return CalculateSize(true, bytes_per_pixel, surface.width, surface.height,
135 surface.depth, surface.block_height, surface.block_depth);
136 }
137 return static_cast<size_t>(surface.pitch * surface.height);
138 };
139 const auto process_pitch_linear = [](bool unpack, std::span<const u8> input,
140 std::span<u8> output, u32 extent_x, u32 extent_y,
141 u32 pitch, u32 x0, u32 y0, size_t bpp) {
142 const size_t base_offset = x0 * bpp;
143 const size_t copy_size = extent_x * bpp;
144 for (u32 y = y0; y < extent_y; y++) {
145 const size_t first_offset = y * pitch + base_offset;
146 const size_t second_offset = y * extent_x * bpp;
147 u8* write_to = unpack ? &output[first_offset] : &output[second_offset];
148 const u8* read_from = unpack ? &input[second_offset] : &input[first_offset];
149 std::memcpy(write_to, read_from, copy_size);
150 }
151 };
152
153 const u32 src_extent_x = config.src_x1 - config.src_x0;
154 const u32 src_extent_y = config.src_y1 - config.src_y0;
155
156 const u32 dst_extent_x = config.dst_x1 - config.dst_x0;
157 const u32 dst_extent_y = config.dst_y1 - config.dst_y0;
158 const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
159 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
160 const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
161 impl->tmp_buffer.resize(src_size);
162 memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
163
164 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
165
166 const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
167
168 impl->src_buffer.resize(src_copy_size);
169
170 const bool no_passthrough =
171 src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y;
172
173 const auto convertion_phase_same_format = [&]() {
174 NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y,
175 dst_extent_x, dst_extent_y, dst_bytes_per_pixel);
176 };
177
178 const auto convertion_phase_ir = [&]() {
179 auto* input_converter = impl->converter_factory.GetFormatConverter(src.format);
180 impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components);
181 impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components);
182 input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src);
183
184 if (config.filter != Fermi2D::Filter::Bilinear) {
185 NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x,
186 src_extent_y, dst_extent_x, dst_extent_y);
187 } else {
188 Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y,
189 dst_extent_x, dst_extent_y);
190 }
191
192 auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format);
193 output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer);
194 };
195
196 // Do actuall Blit
197
198 impl->dst_buffer.resize(dst_copy_size);
199 if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
200 UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
201 src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
202 src_extent_y, src.block_height, src.block_depth,
203 src_extent_x * src_bytes_per_pixel);
204 } else {
205 process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
206 src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
207 }
208
209 // Conversion Phase
210 if (no_passthrough) {
211 if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) {
212 convertion_phase_ir();
213 } else {
214 convertion_phase_same_format();
215 }
216 } else {
217 impl->dst_buffer.swap(impl->src_buffer);
218 }
219
220 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
221 impl->tmp_buffer.resize(dst_size);
222 memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
223
224 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
225 SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
226 dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
227 dst_extent_y, dst.block_height, dst.block_depth,
228 dst_extent_x * dst_bytes_per_pixel);
229 } else {
230 process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
231 dst.pitch, config.dst_x0, config.dst_y0,
232 static_cast<size_t>(dst_bytes_per_pixel));
233 }
234 memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
235 return true;
236}
237
238} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h
new file mode 100644
index 000000000..85b55c836
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/blitter.h
@@ -0,0 +1,27 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "video_core/engines/fermi_2d.h"
7
8namespace Tegra {
9class MemoryManager;
10}
11
12namespace Tegra::Engines::Blitter {
13
14class SoftwareBlitEngine {
15public:
16 explicit SoftwareBlitEngine(MemoryManager& memory_manager_);
17 ~SoftwareBlitEngine();
18
19 bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config);
20
21private:
22 MemoryManager& memory_manager;
23 struct BlitEngineImpl;
24 std::unique_ptr<BlitEngineImpl> impl;
25};
26
27} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp
new file mode 100644
index 000000000..cd46dfd4f
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/converter.cpp
@@ -0,0 +1,1234 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <array>
5#include <bit>
6#include <cmath>
7#include <span>
8#include <unordered_map>
9
10#include "common/assert.h"
11#include "video_core/engines/sw_blitter/converter.h"
12#include "video_core/surface.h"
13#include "video_core/textures/decoders.h"
14
15#ifdef _MSC_VER
16#define FORCE_INLINE __forceinline
17#else
18#define FORCE_INLINE inline __attribute__((always_inline))
19#endif
20
21namespace Tegra::Engines::Blitter {
22
23enum class Swizzle : size_t {
24 R = 0,
25 G = 1,
26 B = 2,
27 A = 3,
28 None,
29};
30
31enum class ComponentType : u32 {
32 SNORM = 1,
33 UNORM = 2,
34 SINT = 3,
35 UINT = 4,
36 SNORM_FORCE_FP16 = 5,
37 UNORM_FORCE_FP16 = 6,
38 FLOAT = 7,
39 SRGB = 8,
40};
41
42namespace {
43
44/*
45 * Note: Use generate_converters.py to generate the structs and searches for new render target
46 * formats and copy paste them to this file in order to update. just call "python
47 * generate_converters.py" and get the code from the output. modify the file to add new formats.
48 */
49
50constexpr std::array<f32, 256> SRGB_TO_RGB_LUT = {
51 0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f,
52 1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f,
53 3.676507e-03f, 4.024717e-03f, 4.391442e-03f, 4.776953e-03f, 5.181517e-03f, 5.605392e-03f,
54 6.048833e-03f, 6.512091e-03f, 6.995410e-03f, 7.499032e-03f, 8.023193e-03f, 8.568126e-03f,
55 9.134059e-03f, 9.721218e-03f, 1.032982e-02f, 1.096009e-02f, 1.161224e-02f, 1.228649e-02f,
56 1.298303e-02f, 1.370208e-02f, 1.444384e-02f, 1.520851e-02f, 1.599629e-02f, 1.680738e-02f,
57 1.764195e-02f, 1.850022e-02f, 1.938236e-02f, 2.028856e-02f, 2.121901e-02f, 2.217389e-02f,
58 2.315337e-02f, 2.415763e-02f, 2.518686e-02f, 2.624122e-02f, 2.732089e-02f, 2.842604e-02f,
59 2.955684e-02f, 3.071344e-02f, 3.189603e-02f, 3.310477e-02f, 3.433981e-02f, 3.560131e-02f,
60 3.688945e-02f, 3.820437e-02f, 3.954624e-02f, 4.091520e-02f, 4.231141e-02f, 4.373503e-02f,
61 4.518620e-02f, 4.666509e-02f, 4.817183e-02f, 4.970657e-02f, 5.126946e-02f, 5.286065e-02f,
62 5.448028e-02f, 5.612849e-02f, 5.780543e-02f, 5.951124e-02f, 6.124605e-02f, 6.301001e-02f,
63 6.480327e-02f, 6.662594e-02f, 6.847817e-02f, 7.036009e-02f, 7.227185e-02f, 7.421357e-02f,
64 7.618538e-02f, 7.818742e-02f, 8.021982e-02f, 8.228271e-02f, 8.437621e-02f, 8.650046e-02f,
65 8.865558e-02f, 9.084171e-02f, 9.305897e-02f, 9.530747e-02f, 9.758735e-02f, 9.989873e-02f,
66 1.022417e-01f, 1.046165e-01f, 1.070231e-01f, 1.094617e-01f, 1.119324e-01f, 1.144354e-01f,
67 1.169707e-01f, 1.195384e-01f, 1.221388e-01f, 1.247718e-01f, 1.274377e-01f, 1.301365e-01f,
68 1.328683e-01f, 1.356333e-01f, 1.384316e-01f, 1.412633e-01f, 1.441285e-01f, 1.470273e-01f,
69 1.499598e-01f, 1.529261e-01f, 1.559265e-01f, 1.589608e-01f, 1.620294e-01f, 1.651322e-01f,
70 1.682694e-01f, 1.714411e-01f, 1.746474e-01f, 1.778884e-01f, 1.811642e-01f, 1.844750e-01f,
71 1.878208e-01f, 1.912017e-01f, 1.946178e-01f, 1.980693e-01f, 2.015563e-01f, 2.050787e-01f,
72 2.086369e-01f, 2.122308e-01f, 2.158605e-01f, 2.195262e-01f, 2.232280e-01f, 2.269659e-01f,
73 2.307401e-01f, 2.345506e-01f, 2.383976e-01f, 2.422811e-01f, 2.462013e-01f, 2.501583e-01f,
74 2.541521e-01f, 2.581829e-01f, 2.622507e-01f, 2.663556e-01f, 2.704978e-01f, 2.746773e-01f,
75 2.788943e-01f, 2.831487e-01f, 2.874408e-01f, 2.917706e-01f, 2.961383e-01f, 3.005438e-01f,
76 3.049873e-01f, 3.094689e-01f, 3.139887e-01f, 3.185468e-01f, 3.231432e-01f, 3.277781e-01f,
77 3.324515e-01f, 3.371636e-01f, 3.419144e-01f, 3.467041e-01f, 3.515326e-01f, 3.564001e-01f,
78 3.613068e-01f, 3.662526e-01f, 3.712377e-01f, 3.762621e-01f, 3.813260e-01f, 3.864294e-01f,
79 3.915725e-01f, 3.967552e-01f, 4.019778e-01f, 4.072402e-01f, 4.125426e-01f, 4.178851e-01f,
80 4.232677e-01f, 4.286905e-01f, 4.341536e-01f, 4.396572e-01f, 4.452012e-01f, 4.507858e-01f,
81 4.564110e-01f, 4.620770e-01f, 4.677838e-01f, 4.735315e-01f, 4.793202e-01f, 4.851499e-01f,
82 4.910209e-01f, 4.969330e-01f, 5.028865e-01f, 5.088813e-01f, 5.149177e-01f, 5.209956e-01f,
83 5.271151e-01f, 5.332764e-01f, 5.394795e-01f, 5.457245e-01f, 5.520114e-01f, 5.583404e-01f,
84 5.647115e-01f, 5.711249e-01f, 5.775805e-01f, 5.840784e-01f, 5.906188e-01f, 5.972018e-01f,
85 6.038274e-01f, 6.104956e-01f, 6.172066e-01f, 6.239604e-01f, 6.307572e-01f, 6.375968e-01f,
86 6.444797e-01f, 6.514056e-01f, 6.583748e-01f, 6.653873e-01f, 6.724432e-01f, 6.795425e-01f,
87 6.866853e-01f, 6.938717e-01f, 7.011019e-01f, 7.083758e-01f, 7.156935e-01f, 7.230551e-01f,
88 7.304608e-01f, 7.379104e-01f, 7.454042e-01f, 7.529422e-01f, 7.605245e-01f, 7.681512e-01f,
89 7.758222e-01f, 7.835378e-01f, 7.912979e-01f, 7.991027e-01f, 8.069522e-01f, 8.148466e-01f,
90 8.227857e-01f, 8.307699e-01f, 8.387990e-01f, 8.468732e-01f, 8.549926e-01f, 8.631572e-01f,
91 8.713671e-01f, 8.796224e-01f, 8.879231e-01f, 8.962694e-01f, 9.046612e-01f, 9.130986e-01f,
92 9.215819e-01f, 9.301109e-01f, 9.386857e-01f, 9.473065e-01f, 9.559733e-01f, 9.646863e-01f,
93 9.734453e-01f, 9.822506e-01f, 9.911021e-01f, 1.000000e+00f};
94
95constexpr std::array<f32, 256> RGB_TO_SRGB_LUT = {
96 0.000000e+00f, 4.984009e-02f, 8.494473e-02f, 1.107021e-01f, 1.318038e-01f, 1.500052e-01f,
97 1.661857e-01f, 1.808585e-01f, 1.943532e-01f, 2.068957e-01f, 2.186491e-01f, 2.297351e-01f,
98 2.402475e-01f, 2.502604e-01f, 2.598334e-01f, 2.690152e-01f, 2.778465e-01f, 2.863614e-01f,
99 2.945889e-01f, 3.025538e-01f, 3.102778e-01f, 3.177796e-01f, 3.250757e-01f, 3.321809e-01f,
100 3.391081e-01f, 3.458689e-01f, 3.524737e-01f, 3.589320e-01f, 3.652521e-01f, 3.714419e-01f,
101 3.775084e-01f, 3.834581e-01f, 3.892968e-01f, 3.950301e-01f, 4.006628e-01f, 4.061998e-01f,
102 4.116451e-01f, 4.170030e-01f, 4.222770e-01f, 4.274707e-01f, 4.325873e-01f, 4.376298e-01f,
103 4.426010e-01f, 4.475037e-01f, 4.523403e-01f, 4.571131e-01f, 4.618246e-01f, 4.664766e-01f,
104 4.710712e-01f, 4.756104e-01f, 4.800958e-01f, 4.845292e-01f, 4.889122e-01f, 4.932462e-01f,
105 4.975329e-01f, 5.017734e-01f, 5.059693e-01f, 5.101216e-01f, 5.142317e-01f, 5.183006e-01f,
106 5.223295e-01f, 5.263194e-01f, 5.302714e-01f, 5.341862e-01f, 5.380651e-01f, 5.419087e-01f,
107 5.457181e-01f, 5.494938e-01f, 5.532369e-01f, 5.569480e-01f, 5.606278e-01f, 5.642771e-01f,
108 5.678965e-01f, 5.714868e-01f, 5.750484e-01f, 5.785821e-01f, 5.820884e-01f, 5.855680e-01f,
109 5.890211e-01f, 5.924487e-01f, 5.958509e-01f, 5.992285e-01f, 6.025819e-01f, 6.059114e-01f,
110 6.092176e-01f, 6.125010e-01f, 6.157619e-01f, 6.190008e-01f, 6.222180e-01f, 6.254140e-01f,
111 6.285890e-01f, 6.317436e-01f, 6.348780e-01f, 6.379926e-01f, 6.410878e-01f, 6.441637e-01f,
112 6.472208e-01f, 6.502595e-01f, 6.532799e-01f, 6.562824e-01f, 6.592672e-01f, 6.622347e-01f,
113 6.651851e-01f, 6.681187e-01f, 6.710356e-01f, 6.739363e-01f, 6.768209e-01f, 6.796897e-01f,
114 6.825429e-01f, 6.853807e-01f, 6.882034e-01f, 6.910111e-01f, 6.938041e-01f, 6.965826e-01f,
115 6.993468e-01f, 7.020969e-01f, 7.048331e-01f, 7.075556e-01f, 7.102645e-01f, 7.129600e-01f,
116 7.156424e-01f, 7.183118e-01f, 7.209683e-01f, 7.236121e-01f, 7.262435e-01f, 7.288625e-01f,
117 7.314693e-01f, 7.340640e-01f, 7.366470e-01f, 7.392181e-01f, 7.417776e-01f, 7.443256e-01f,
118 7.468624e-01f, 7.493880e-01f, 7.519025e-01f, 7.544061e-01f, 7.568989e-01f, 7.593810e-01f,
119 7.618526e-01f, 7.643137e-01f, 7.667645e-01f, 7.692052e-01f, 7.716358e-01f, 7.740564e-01f,
120 7.764671e-01f, 7.788681e-01f, 7.812595e-01f, 7.836413e-01f, 7.860138e-01f, 7.883768e-01f,
121 7.907307e-01f, 7.930754e-01f, 7.954110e-01f, 7.977377e-01f, 8.000556e-01f, 8.023647e-01f,
122 8.046651e-01f, 8.069569e-01f, 8.092403e-01f, 8.115152e-01f, 8.137818e-01f, 8.160402e-01f,
123 8.182903e-01f, 8.205324e-01f, 8.227665e-01f, 8.249926e-01f, 8.272109e-01f, 8.294214e-01f,
124 8.316242e-01f, 8.338194e-01f, 8.360070e-01f, 8.381871e-01f, 8.403597e-01f, 8.425251e-01f,
125 8.446831e-01f, 8.468339e-01f, 8.489776e-01f, 8.511142e-01f, 8.532437e-01f, 8.553662e-01f,
126 8.574819e-01f, 8.595907e-01f, 8.616927e-01f, 8.637881e-01f, 8.658767e-01f, 8.679587e-01f,
127 8.700342e-01f, 8.721032e-01f, 8.741657e-01f, 8.762218e-01f, 8.782716e-01f, 8.803151e-01f,
128 8.823524e-01f, 8.843835e-01f, 8.864085e-01f, 8.884274e-01f, 8.904402e-01f, 8.924471e-01f,
129 8.944480e-01f, 8.964431e-01f, 8.984324e-01f, 9.004158e-01f, 9.023935e-01f, 9.043654e-01f,
130 9.063318e-01f, 9.082925e-01f, 9.102476e-01f, 9.121972e-01f, 9.141413e-01f, 9.160800e-01f,
131 9.180133e-01f, 9.199412e-01f, 9.218637e-01f, 9.237810e-01f, 9.256931e-01f, 9.276000e-01f,
132 9.295017e-01f, 9.313982e-01f, 9.332896e-01f, 9.351761e-01f, 9.370575e-01f, 9.389339e-01f,
133 9.408054e-01f, 9.426719e-01f, 9.445336e-01f, 9.463905e-01f, 9.482424e-01f, 9.500897e-01f,
134 9.519322e-01f, 9.537700e-01f, 9.556032e-01f, 9.574316e-01f, 9.592555e-01f, 9.610748e-01f,
135 9.628896e-01f, 9.646998e-01f, 9.665055e-01f, 9.683068e-01f, 9.701037e-01f, 9.718961e-01f,
136 9.736842e-01f, 9.754679e-01f, 9.772474e-01f, 9.790225e-01f, 9.807934e-01f, 9.825601e-01f,
137 9.843225e-01f, 9.860808e-01f, 9.878350e-01f, 9.895850e-01f, 9.913309e-01f, 9.930727e-01f,
138 9.948106e-01f, 9.965444e-01f, 9.982741e-01f, 1.000000e+00f};
139
140} // namespace
141
142struct R32G32B32A32_FLOATTraits {
143 static constexpr size_t num_components = 4;
144 static constexpr std::array<ComponentType, num_components> component_types = {
145 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
146 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
147 static constexpr std::array<Swizzle, num_components> component_swizzle = {
148 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
149};
150
151struct R32G32B32A32_SINTTraits {
152 static constexpr size_t num_components = 4;
153 static constexpr std::array<ComponentType, num_components> component_types = {
154 ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
155 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
156 static constexpr std::array<Swizzle, num_components> component_swizzle = {
157 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
158};
159
160struct R32G32B32A32_UINTTraits {
161 static constexpr size_t num_components = 4;
162 static constexpr std::array<ComponentType, num_components> component_types = {
163 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
164 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
165 static constexpr std::array<Swizzle, num_components> component_swizzle = {
166 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
167};
168
169struct R32G32B32X32_FLOATTraits {
170 static constexpr size_t num_components = 4;
171 static constexpr std::array<ComponentType, num_components> component_types = {
172 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
173 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
174 static constexpr std::array<Swizzle, num_components> component_swizzle = {
175 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
176};
177
178struct R32G32B32X32_SINTTraits {
179 static constexpr size_t num_components = 4;
180 static constexpr std::array<ComponentType, num_components> component_types = {
181 ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
182 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
183 static constexpr std::array<Swizzle, num_components> component_swizzle = {
184 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
185};
186
187struct R32G32B32X32_UINTTraits {
188 static constexpr size_t num_components = 4;
189 static constexpr std::array<ComponentType, num_components> component_types = {
190 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
191 static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
192 static constexpr std::array<Swizzle, num_components> component_swizzle = {
193 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
194};
195
196struct R16G16B16A16_UNORMTraits {
197 static constexpr size_t num_components = 4;
198 static constexpr std::array<ComponentType, num_components> component_types = {
199 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
200 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
201 static constexpr std::array<Swizzle, num_components> component_swizzle = {
202 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
203};
204
205struct R16G16B16A16_SNORMTraits {
206 static constexpr size_t num_components = 4;
207 static constexpr std::array<ComponentType, num_components> component_types = {
208 ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM};
209 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
210 static constexpr std::array<Swizzle, num_components> component_swizzle = {
211 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
212};
213
214struct R16G16B16A16_SINTTraits {
215 static constexpr size_t num_components = 4;
216 static constexpr std::array<ComponentType, num_components> component_types = {
217 ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
218 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
219 static constexpr std::array<Swizzle, num_components> component_swizzle = {
220 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
221};
222
223struct R16G16B16A16_UINTTraits {
224 static constexpr size_t num_components = 4;
225 static constexpr std::array<ComponentType, num_components> component_types = {
226 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
227 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
228 static constexpr std::array<Swizzle, num_components> component_swizzle = {
229 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
230};
231
232struct R16G16B16A16_FLOATTraits {
233 static constexpr size_t num_components = 4;
234 static constexpr std::array<ComponentType, num_components> component_types = {
235 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
236 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
237 static constexpr std::array<Swizzle, num_components> component_swizzle = {
238 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
239};
240
241struct R32G32_FLOATTraits {
242 static constexpr size_t num_components = 2;
243 static constexpr std::array<ComponentType, num_components> component_types = {
244 ComponentType::FLOAT, ComponentType::FLOAT};
245 static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
246 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
247 Swizzle::G};
248};
249
250struct R32G32_SINTTraits {
251 static constexpr size_t num_components = 2;
252 static constexpr std::array<ComponentType, num_components> component_types = {
253 ComponentType::SINT, ComponentType::SINT};
254 static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
255 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
256 Swizzle::G};
257};
258
259struct R32G32_UINTTraits {
260 static constexpr size_t num_components = 2;
261 static constexpr std::array<ComponentType, num_components> component_types = {
262 ComponentType::UINT, ComponentType::UINT};
263 static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
264 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
265 Swizzle::G};
266};
267
268struct R16G16B16X16_FLOATTraits {
269 static constexpr size_t num_components = 4;
270 static constexpr std::array<ComponentType, num_components> component_types = {
271 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
272 static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
273 static constexpr std::array<Swizzle, num_components> component_swizzle = {
274 Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
275};
276
277struct A8R8G8B8_UNORMTraits {
278 static constexpr size_t num_components = 4;
279 static constexpr std::array<ComponentType, num_components> component_types = {
280 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
281 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
282 static constexpr std::array<Swizzle, num_components> component_swizzle = {
283 Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
284};
285
286struct A8R8G8B8_SRGBTraits {
287 static constexpr size_t num_components = 4;
288 static constexpr std::array<ComponentType, num_components> component_types = {
289 ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
290 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
291 static constexpr std::array<Swizzle, num_components> component_swizzle = {
292 Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
293};
294
295struct A2B10G10R10_UNORMTraits {
296 static constexpr size_t num_components = 4;
297 static constexpr std::array<ComponentType, num_components> component_types = {
298 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
299 static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10};
300 static constexpr std::array<Swizzle, num_components> component_swizzle = {
301 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
302};
303
304struct A2B10G10R10_UINTTraits {
305 static constexpr size_t num_components = 4;
306 static constexpr std::array<ComponentType, num_components> component_types = {
307 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
308 static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10};
309 static constexpr std::array<Swizzle, num_components> component_swizzle = {
310 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
311};
312
313struct A2R10G10B10_UNORMTraits {
314 static constexpr size_t num_components = 4;
315 static constexpr std::array<ComponentType, num_components> component_types = {
316 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
317 static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10};
318 static constexpr std::array<Swizzle, num_components> component_swizzle = {
319 Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
320};
321
322struct A8B8G8R8_UNORMTraits {
323 static constexpr size_t num_components = 4;
324 static constexpr std::array<ComponentType, num_components> component_types = {
325 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
326 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
327 static constexpr std::array<Swizzle, num_components> component_swizzle = {
328 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
329};
330
331struct A8B8G8R8_SRGBTraits {
332 static constexpr size_t num_components = 4;
333 static constexpr std::array<ComponentType, num_components> component_types = {
334 ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
335 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
336 static constexpr std::array<Swizzle, num_components> component_swizzle = {
337 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
338};
339
340struct A8B8G8R8_SNORMTraits {
341 static constexpr size_t num_components = 4;
342 static constexpr std::array<ComponentType, num_components> component_types = {
343 ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM};
344 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
345 static constexpr std::array<Swizzle, num_components> component_swizzle = {
346 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
347};
348
349struct A8B8G8R8_SINTTraits {
350 static constexpr size_t num_components = 4;
351 static constexpr std::array<ComponentType, num_components> component_types = {
352 ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
353 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
354 static constexpr std::array<Swizzle, num_components> component_swizzle = {
355 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
356};
357
358struct A8B8G8R8_UINTTraits {
359 static constexpr size_t num_components = 4;
360 static constexpr std::array<ComponentType, num_components> component_types = {
361 ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
362 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
363 static constexpr std::array<Swizzle, num_components> component_swizzle = {
364 Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
365};
366
367struct R16G16_UNORMTraits {
368 static constexpr size_t num_components = 2;
369 static constexpr std::array<ComponentType, num_components> component_types = {
370 ComponentType::UNORM, ComponentType::UNORM};
371 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
372 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
373 Swizzle::G};
374};
375
376struct R16G16_SNORMTraits {
377 static constexpr size_t num_components = 2;
378 static constexpr std::array<ComponentType, num_components> component_types = {
379 ComponentType::SNORM, ComponentType::SNORM};
380 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
381 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
382 Swizzle::G};
383};
384
385struct R16G16_SINTTraits {
386 static constexpr size_t num_components = 2;
387 static constexpr std::array<ComponentType, num_components> component_types = {
388 ComponentType::SINT, ComponentType::SINT};
389 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
390 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
391 Swizzle::G};
392};
393
394struct R16G16_UINTTraits {
395 static constexpr size_t num_components = 2;
396 static constexpr std::array<ComponentType, num_components> component_types = {
397 ComponentType::UINT, ComponentType::UINT};
398 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
399 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
400 Swizzle::G};
401};
402
403struct R16G16_FLOATTraits {
404 static constexpr size_t num_components = 2;
405 static constexpr std::array<ComponentType, num_components> component_types = {
406 ComponentType::FLOAT, ComponentType::FLOAT};
407 static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
408 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
409 Swizzle::G};
410};
411
412struct B10G11R11_FLOATTraits {
413 static constexpr size_t num_components = 3;
414 static constexpr std::array<ComponentType, num_components> component_types = {
415 ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
416 static constexpr std::array<size_t, num_components> component_sizes = {10, 11, 11};
417 static constexpr std::array<Swizzle, num_components> component_swizzle = {
418 Swizzle::B, Swizzle::G, Swizzle::R};
419};
420
421struct R32_SINTTraits {
422 static constexpr size_t num_components = 1;
423 static constexpr std::array<ComponentType, num_components> component_types = {
424 ComponentType::SINT};
425 static constexpr std::array<size_t, num_components> component_sizes = {32};
426 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
427};
428
429struct R32_UINTTraits {
430 static constexpr size_t num_components = 1;
431 static constexpr std::array<ComponentType, num_components> component_types = {
432 ComponentType::UINT};
433 static constexpr std::array<size_t, num_components> component_sizes = {32};
434 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
435};
436
437struct R32_FLOATTraits {
438 static constexpr size_t num_components = 1;
439 static constexpr std::array<ComponentType, num_components> component_types = {
440 ComponentType::FLOAT};
441 static constexpr std::array<size_t, num_components> component_sizes = {32};
442 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
443};
444
445struct X8R8G8B8_UNORMTraits {
446 static constexpr size_t num_components = 4;
447 static constexpr std::array<ComponentType, num_components> component_types = {
448 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
449 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
450 static constexpr std::array<Swizzle, num_components> component_swizzle = {
451 Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B};
452};
453
454struct X8R8G8B8_SRGBTraits {
455 static constexpr size_t num_components = 4;
456 static constexpr std::array<ComponentType, num_components> component_types = {
457 ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
458 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
459 static constexpr std::array<Swizzle, num_components> component_swizzle = {
460 Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B};
461};
462
463struct R5G6B5_UNORMTraits {
464 static constexpr size_t num_components = 3;
465 static constexpr std::array<ComponentType, num_components> component_types = {
466 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
467 static constexpr std::array<size_t, num_components> component_sizes = {5, 6, 5};
468 static constexpr std::array<Swizzle, num_components> component_swizzle = {
469 Swizzle::R, Swizzle::G, Swizzle::B};
470};
471
472struct A1R5G5B5_UNORMTraits {
473 static constexpr size_t num_components = 4;
474 static constexpr std::array<ComponentType, num_components> component_types = {
475 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
476 static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5};
477 static constexpr std::array<Swizzle, num_components> component_swizzle = {
478 Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
479};
480
481struct R8G8_UNORMTraits {
482 static constexpr size_t num_components = 2;
483 static constexpr std::array<ComponentType, num_components> component_types = {
484 ComponentType::UNORM, ComponentType::UNORM};
485 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
486 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
487 Swizzle::G};
488};
489
490struct R8G8_SNORMTraits {
491 static constexpr size_t num_components = 2;
492 static constexpr std::array<ComponentType, num_components> component_types = {
493 ComponentType::SNORM, ComponentType::SNORM};
494 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
495 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
496 Swizzle::G};
497};
498
499struct R8G8_SINTTraits {
500 static constexpr size_t num_components = 2;
501 static constexpr std::array<ComponentType, num_components> component_types = {
502 ComponentType::SINT, ComponentType::SINT};
503 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
504 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
505 Swizzle::G};
506};
507
508struct R8G8_UINTTraits {
509 static constexpr size_t num_components = 2;
510 static constexpr std::array<ComponentType, num_components> component_types = {
511 ComponentType::UINT, ComponentType::UINT};
512 static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
513 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
514 Swizzle::G};
515};
516
517struct R16_UNORMTraits {
518 static constexpr size_t num_components = 1;
519 static constexpr std::array<ComponentType, num_components> component_types = {
520 ComponentType::UNORM};
521 static constexpr std::array<size_t, num_components> component_sizes = {16};
522 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
523};
524
525struct R16_SNORMTraits {
526 static constexpr size_t num_components = 1;
527 static constexpr std::array<ComponentType, num_components> component_types = {
528 ComponentType::SNORM};
529 static constexpr std::array<size_t, num_components> component_sizes = {16};
530 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
531};
532
533struct R16_SINTTraits {
534 static constexpr size_t num_components = 1;
535 static constexpr std::array<ComponentType, num_components> component_types = {
536 ComponentType::SINT};
537 static constexpr std::array<size_t, num_components> component_sizes = {16};
538 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
539};
540
541struct R16_UINTTraits {
542 static constexpr size_t num_components = 1;
543 static constexpr std::array<ComponentType, num_components> component_types = {
544 ComponentType::UINT};
545 static constexpr std::array<size_t, num_components> component_sizes = {16};
546 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
547};
548
549struct R16_FLOATTraits {
550 static constexpr size_t num_components = 1;
551 static constexpr std::array<ComponentType, num_components> component_types = {
552 ComponentType::FLOAT};
553 static constexpr std::array<size_t, num_components> component_sizes = {16};
554 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
555};
556
557struct R8_UNORMTraits {
558 static constexpr size_t num_components = 1;
559 static constexpr std::array<ComponentType, num_components> component_types = {
560 ComponentType::UNORM};
561 static constexpr std::array<size_t, num_components> component_sizes = {8};
562 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
563};
564
565struct R8_SNORMTraits {
566 static constexpr size_t num_components = 1;
567 static constexpr std::array<ComponentType, num_components> component_types = {
568 ComponentType::SNORM};
569 static constexpr std::array<size_t, num_components> component_sizes = {8};
570 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
571};
572
573struct R8_SINTTraits {
574 static constexpr size_t num_components = 1;
575 static constexpr std::array<ComponentType, num_components> component_types = {
576 ComponentType::SINT};
577 static constexpr std::array<size_t, num_components> component_sizes = {8};
578 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
579};
580
581struct R8_UINTTraits {
582 static constexpr size_t num_components = 1;
583 static constexpr std::array<ComponentType, num_components> component_types = {
584 ComponentType::UINT};
585 static constexpr std::array<size_t, num_components> component_sizes = {8};
586 static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
587};
588
589struct X1R5G5B5_UNORMTraits {
590 static constexpr size_t num_components = 4;
591 static constexpr std::array<ComponentType, num_components> component_types = {
592 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
593 static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5};
594 static constexpr std::array<Swizzle, num_components> component_swizzle = {
595 Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B};
596};
597
598struct X8B8G8R8_UNORMTraits {
599 static constexpr size_t num_components = 4;
600 static constexpr std::array<ComponentType, num_components> component_types = {
601 ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
602 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
603 static constexpr std::array<Swizzle, num_components> component_swizzle = {
604 Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R};
605};
606
607struct X8B8G8R8_SRGBTraits {
608 static constexpr size_t num_components = 4;
609 static constexpr std::array<ComponentType, num_components> component_types = {
610 ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
611 static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
612 static constexpr std::array<Swizzle, num_components> component_swizzle = {
613 Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R};
614};
615
616template <class ConverterTraits>
617class ConverterImpl : public Converter {
618private:
619 static constexpr size_t num_components = ConverterTraits::num_components;
620 static constexpr std::array<ComponentType, num_components> component_types =
621 ConverterTraits::component_types;
622 static constexpr std::array<size_t, num_components> component_sizes =
623 ConverterTraits::component_sizes;
624 static constexpr std::array<Swizzle, num_components> component_swizzle =
625 ConverterTraits::component_swizzle;
626
627 static constexpr size_t CalculateByteSize() {
628 size_t size = 0;
629 for (const size_t component_size : component_sizes) {
630 size += component_size;
631 }
632 const size_t power = (sizeof(size_t) * 8) - std::countl_zero(size) - 1ULL;
633 const size_t base_size = 1ULL << power;
634 const size_t mask = base_size - 1ULL;
635 return ((size & mask) != 0 ? base_size << 1ULL : base_size) / 8;
636 }
637
638 static constexpr size_t total_bytes_per_pixel = CalculateByteSize();
639 static constexpr size_t total_words_per_pixel =
640 (total_bytes_per_pixel + sizeof(u32) - 1U) / sizeof(u32);
641 static constexpr size_t components_per_ir_rep = 4;
642
643 template <bool get_offsets>
644 static constexpr std::array<size_t, num_components> GetBoundWordsOffsets() {
645 std::array<size_t, num_components> result;
646 result.fill(0);
647 constexpr size_t total_bits_per_word = sizeof(u32) * 8;
648 size_t accumulated_size = 0;
649 size_t count = 0;
650 for (size_t i = 0; i < num_components; i++) {
651 if constexpr (get_offsets) {
652 result[i] = accumulated_size;
653 } else {
654 result[i] = count;
655 }
656 accumulated_size += component_sizes[i];
657 if (accumulated_size > total_bits_per_word) {
658 if constexpr (get_offsets) {
659 result[i] = 0;
660 } else {
661 result[i]++;
662 }
663 count++;
664 accumulated_size = component_sizes[i];
665 }
666 }
667 return result;
668 }
669
670 static constexpr std::array<size_t, num_components> bound_words = GetBoundWordsOffsets<false>();
671 static constexpr std::array<size_t, num_components> bound_offsets =
672 GetBoundWordsOffsets<true>();
673
674 static constexpr std::array<u32, num_components> GetComponentsMask() {
675 std::array<u32, num_components> result;
676 for (size_t i = 0; i < num_components; i++) {
677 result[i] = (((u32)~0) >> (8 * sizeof(u32) - component_sizes[i])) << bound_offsets[i];
678 }
679 return result;
680 }
681
682 static constexpr std::array<u32, num_components> component_mask = GetComponentsMask();
683
684 // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function
685 // calls, it may fail to detect the benefit of inlining.
686 template <size_t which_component>
687 FORCE_INLINE void ConvertToComponent(u32 which_word, f32& out_component) {
688 const u32 value = (which_word >> bound_offsets[which_component]) &
689 static_cast<u32>((1ULL << component_sizes[which_component]) - 1ULL);
690 const auto sign_extend = [](u32 base_value, size_t bits) {
691 const size_t shift_amount = sizeof(u32) * 8 - bits;
692 s32 shifted_value = static_cast<s32>(base_value << shift_amount);
693 return shifted_value >> shift_amount;
694 };
695 const auto force_to_fp16 = [](f32 base_value) {
696 u32 tmp = std::bit_cast<u32>(base_value);
697 constexpr size_t fp32_mantissa_bits = 23;
698 constexpr size_t fp16_mantissa_bits = 10;
699 constexpr size_t mantissa_mask =
700 ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL);
701 tmp = tmp & static_cast<u32>(mantissa_mask);
702 // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM
703 return std::bit_cast<f32>(tmp);
704 };
705 const auto from_fp_n = [&sign_extend](u32 base_value, size_t bits, size_t mantissa) {
706 constexpr size_t fp32_mantissa_bits = 23;
707 size_t shift_towards = fp32_mantissa_bits - mantissa;
708 const u32 new_value =
709 static_cast<u32>(sign_extend(base_value, bits) << shift_towards) & (~(1U << 31));
710 return std::bit_cast<f32>(new_value);
711 };
712 const auto calculate_snorm = [&]() {
713 return static_cast<f32>(
714 static_cast<f32>(sign_extend(value, component_sizes[which_component])) /
715 static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL));
716 };
717 const auto calculate_unorm = [&]() {
718 return static_cast<f32>(
719 static_cast<f32>(value) /
720 static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL));
721 };
722 if constexpr (component_types[which_component] == ComponentType::SNORM) {
723 out_component = calculate_snorm();
724 } else if constexpr (component_types[which_component] == ComponentType::UNORM) {
725 out_component = calculate_unorm();
726 } else if constexpr (component_types[which_component] == ComponentType::SINT) {
727 out_component = static_cast<f32>(
728 static_cast<s32>(sign_extend(value, component_sizes[which_component])));
729 } else if constexpr (component_types[which_component] == ComponentType::UINT) {
730 out_component = static_cast<f32>(
731 static_cast<s32>(sign_extend(value, component_sizes[which_component])));
732 } else if constexpr (component_types[which_component] == ComponentType::SNORM_FORCE_FP16) {
733 out_component = calculate_snorm();
734 out_component = force_to_fp16(out_component);
735 } else if constexpr (component_types[which_component] == ComponentType::UNORM_FORCE_FP16) {
736 out_component = calculate_unorm();
737 out_component = force_to_fp16(out_component);
738 } else if constexpr (component_types[which_component] == ComponentType::FLOAT) {
739 if constexpr (component_sizes[which_component] == 32) {
740 out_component = std::bit_cast<f32>(value);
741 } else if constexpr (component_sizes[which_component] == 16) {
742 static constexpr u32 sign_mask = 0x8000;
743 static constexpr u32 mantissa_mask = 0x8000;
744 out_component = std::bit_cast<f32>(((value & sign_mask) << 16) |
745 (((value & 0x7c00) + 0x1C000) << 13) |
746 ((value & mantissa_mask) << 13));
747 } else {
748 out_component = from_fp_n(value, component_sizes[which_component],
749 component_sizes[which_component] - 5);
750 }
751 } else if constexpr (component_types[which_component] == ComponentType::SRGB) {
752 if constexpr (component_swizzle[which_component] == Swizzle::A) {
753 out_component = calculate_unorm();
754 } else if constexpr (component_sizes[which_component] == 8) {
755 out_component = SRGB_TO_RGB_LUT[value];
756 } else {
757 out_component = calculate_unorm();
758 UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented",
759 component_sizes[which_component]);
760 }
761 }
762 }
763
764 // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function
765 // calls, it may fail to detect the benefit of inlining.
766 template <size_t which_component>
767 FORCE_INLINE void ConvertFromComponent(u32& which_word, f32 in_component) {
768 const auto insert_to_word = [&]<typename T>(T new_word) {
769 which_word |= (static_cast<u32>(new_word) << bound_offsets[which_component]) &
770 component_mask[which_component];
771 };
772 const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) {
773 constexpr size_t fp32_mantissa_bits = 23;
774 u32 tmp_value = std::bit_cast<u32>(std::max(base_value, 0.0f));
775 size_t shift_towards = fp32_mantissa_bits - mantissa;
776 return tmp_value >> shift_towards;
777 };
778 const auto calculate_unorm = [&]() {
779 return static_cast<u32>(
780 static_cast<f32>(in_component) *
781 static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL));
782 };
783 if constexpr (component_types[which_component] == ComponentType::SNORM ||
784 component_types[which_component] == ComponentType::SNORM_FORCE_FP16) {
785 s32 tmp_word = static_cast<s32>(
786 static_cast<f32>(in_component) *
787 static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL));
788 insert_to_word(tmp_word);
789
790 } else if constexpr (component_types[which_component] == ComponentType::UNORM ||
791 component_types[which_component] == ComponentType::UNORM_FORCE_FP16) {
792 u32 tmp_word = calculate_unorm();
793 insert_to_word(tmp_word);
794 } else if constexpr (component_types[which_component] == ComponentType::SINT) {
795 s32 tmp_word = static_cast<s32>(in_component);
796 insert_to_word(tmp_word);
797 } else if constexpr (component_types[which_component] == ComponentType::UINT) {
798 u32 tmp_word = static_cast<u32>(in_component);
799 insert_to_word(tmp_word);
800 } else if constexpr (component_types[which_component] == ComponentType::FLOAT) {
801 if constexpr (component_sizes[which_component] == 32) {
802 u32 tmp_word = std::bit_cast<u32>(in_component);
803 insert_to_word(tmp_word);
804 } else if constexpr (component_sizes[which_component] == 16) {
805 static constexpr u32 sign_mask = 0x8000;
806 static constexpr u32 mantissa_mask = 0x03ff;
807 static constexpr u32 exponent_mask = 0x7c00;
808 const u32 tmp_word = std::bit_cast<u32>(in_component);
809 const u32 half = ((tmp_word >> 16) & sign_mask) |
810 ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & exponent_mask) |
811 ((tmp_word >> 13) & mantissa_mask);
812 insert_to_word(half);
813 } else {
814 insert_to_word(to_fp_n(in_component, component_sizes[which_component],
815 component_sizes[which_component] - 5));
816 }
817 } else if constexpr (component_types[which_component] == ComponentType::SRGB) {
818 if constexpr (component_swizzle[which_component] != Swizzle::A) {
819 if constexpr (component_sizes[which_component] == 8) {
820 const u32 index = calculate_unorm();
821 in_component = RGB_TO_SRGB_LUT[index];
822 } else {
823 UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented",
824 component_sizes[which_component]);
825 }
826 }
827 const u32 tmp_word = calculate_unorm();
828 insert_to_word(tmp_word);
829 }
830 }
831
832public:
833 void ConvertTo(std::span<const u8> input, std::span<f32> output) override {
834 const size_t num_pixels = output.size() / components_per_ir_rep;
835 for (size_t pixel = 0; pixel < num_pixels; pixel++) {
836 std::array<u32, total_words_per_pixel> words{};
837
838 std::memcpy(words.data(), &input[pixel * total_bytes_per_pixel], total_bytes_per_pixel);
839 std::span<f32> new_components(&output[pixel * components_per_ir_rep],
840 components_per_ir_rep);
841 if constexpr (component_swizzle[0] != Swizzle::None) {
842 ConvertToComponent<0>(words[bound_words[0]],
843 new_components[static_cast<size_t>(component_swizzle[0])]);
844 } else {
845 new_components[0] = 0.0f;
846 }
847 if constexpr (num_components >= 2) {
848 if constexpr (component_swizzle[1] != Swizzle::None) {
849 ConvertToComponent<1>(
850 words[bound_words[1]],
851 new_components[static_cast<size_t>(component_swizzle[1])]);
852 } else {
853 new_components[1] = 0.0f;
854 }
855 } else {
856 new_components[1] = 0.0f;
857 }
858 if constexpr (num_components >= 3) {
859 if constexpr (component_swizzle[2] != Swizzle::None) {
860 ConvertToComponent<2>(
861 words[bound_words[2]],
862 new_components[static_cast<size_t>(component_swizzle[2])]);
863 } else {
864 new_components[2] = 0.0f;
865 }
866 } else {
867 new_components[2] = 0.0f;
868 }
869 if constexpr (num_components >= 4) {
870 if constexpr (component_swizzle[3] != Swizzle::None) {
871 ConvertToComponent<3>(
872 words[bound_words[3]],
873 new_components[static_cast<size_t>(component_swizzle[3])]);
874 } else {
875 new_components[3] = 0.0f;
876 }
877 } else {
878 new_components[3] = 0.0f;
879 }
880 }
881 }
882
883 void ConvertFrom(std::span<const f32> input, std::span<u8> output) override {
884 const size_t num_pixels = output.size() / total_bytes_per_pixel;
885 for (size_t pixel = 0; pixel < num_pixels; pixel++) {
886 std::span<const f32> old_components(&input[pixel * components_per_ir_rep],
887 components_per_ir_rep);
888 std::array<u32, total_words_per_pixel> words{};
889 if constexpr (component_swizzle[0] != Swizzle::None) {
890 ConvertFromComponent<0>(words[bound_words[0]],
891 old_components[static_cast<size_t>(component_swizzle[0])]);
892 }
893 if constexpr (num_components >= 2) {
894 if constexpr (component_swizzle[1] != Swizzle::None) {
895 ConvertFromComponent<1>(
896 words[bound_words[1]],
897 old_components[static_cast<size_t>(component_swizzle[1])]);
898 }
899 }
900 if constexpr (num_components >= 3) {
901 if constexpr (component_swizzle[2] != Swizzle::None) {
902 ConvertFromComponent<2>(
903 words[bound_words[2]],
904 old_components[static_cast<size_t>(component_swizzle[2])]);
905 }
906 }
907 if constexpr (num_components >= 4) {
908 if constexpr (component_swizzle[3] != Swizzle::None) {
909 ConvertFromComponent<3>(
910 words[bound_words[3]],
911 old_components[static_cast<size_t>(component_swizzle[3])]);
912 }
913 }
914 std::memcpy(&output[pixel * total_bytes_per_pixel], words.data(),
915 total_bytes_per_pixel);
916 }
917 }
918
919 ConverterImpl() = default;
920 ~ConverterImpl() override = default;
921};
922
923struct ConverterFactory::ConverterFactoryImpl {
924 std::unordered_map<RenderTargetFormat, std::unique_ptr<Converter>> converters_cache;
925};
926
927ConverterFactory::ConverterFactory() {
928 impl = std::make_unique<ConverterFactoryImpl>();
929}
930
931ConverterFactory::~ConverterFactory() = default;
932
933Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) {
934 auto it = impl->converters_cache.find(format);
935 if (it == impl->converters_cache.end()) [[unlikely]] {
936 return BuildConverter(format);
937 }
938 return it->second.get();
939}
940
941class NullConverter : public Converter {
942public:
943 void ConvertTo([[maybe_unused]] std::span<const u8> input, std::span<f32> output) override {
944 std::fill(output.begin(), output.end(), 0.0f);
945 }
946 void ConvertFrom([[maybe_unused]] std::span<const f32> input, std::span<u8> output) override {
947 const u8 fill_value = 0U;
948 std::fill(output.begin(), output.end(), fill_value);
949 }
950 NullConverter() = default;
951 ~NullConverter() = default;
952};
953
954Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) {
955 switch (format) {
956 case RenderTargetFormat::R32G32B32A32_FLOAT:
957 return impl->converters_cache
958 .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_FLOATTraits>>())
959 .first->second.get();
960 break;
961 case RenderTargetFormat::R32G32B32A32_SINT:
962 return impl->converters_cache
963 .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_SINTTraits>>())
964 .first->second.get();
965 break;
966 case RenderTargetFormat::R32G32B32A32_UINT:
967 return impl->converters_cache
968 .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_UINTTraits>>())
969 .first->second.get();
970 break;
971 case RenderTargetFormat::R32G32B32X32_FLOAT:
972 return impl->converters_cache
973 .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_FLOATTraits>>())
974 .first->second.get();
975 break;
976 case RenderTargetFormat::R32G32B32X32_SINT:
977 return impl->converters_cache
978 .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_SINTTraits>>())
979 .first->second.get();
980 break;
981 case RenderTargetFormat::R32G32B32X32_UINT:
982 return impl->converters_cache
983 .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_UINTTraits>>())
984 .first->second.get();
985 break;
986 case RenderTargetFormat::R16G16B16A16_UNORM:
987 return impl->converters_cache
988 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UNORMTraits>>())
989 .first->second.get();
990 break;
991 case RenderTargetFormat::R16G16B16A16_SNORM:
992 return impl->converters_cache
993 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SNORMTraits>>())
994 .first->second.get();
995 break;
996 case RenderTargetFormat::R16G16B16A16_SINT:
997 return impl->converters_cache
998 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SINTTraits>>())
999 .first->second.get();
1000 break;
1001 case RenderTargetFormat::R16G16B16A16_UINT:
1002 return impl->converters_cache
1003 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UINTTraits>>())
1004 .first->second.get();
1005 break;
1006 case RenderTargetFormat::R16G16B16A16_FLOAT:
1007 return impl->converters_cache
1008 .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_FLOATTraits>>())
1009 .first->second.get();
1010 break;
1011 case RenderTargetFormat::R32G32_FLOAT:
1012 return impl->converters_cache
1013 .emplace(format, std::make_unique<ConverterImpl<R32G32_FLOATTraits>>())
1014 .first->second.get();
1015 break;
1016 case RenderTargetFormat::R32G32_SINT:
1017 return impl->converters_cache
1018 .emplace(format, std::make_unique<ConverterImpl<R32G32_SINTTraits>>())
1019 .first->second.get();
1020 break;
1021 case RenderTargetFormat::R32G32_UINT:
1022 return impl->converters_cache
1023 .emplace(format, std::make_unique<ConverterImpl<R32G32_UINTTraits>>())
1024 .first->second.get();
1025 break;
1026 case RenderTargetFormat::R16G16B16X16_FLOAT:
1027 return impl->converters_cache
1028 .emplace(format, std::make_unique<ConverterImpl<R16G16B16X16_FLOATTraits>>())
1029 .first->second.get();
1030 break;
1031 case RenderTargetFormat::A8R8G8B8_UNORM:
1032 return impl->converters_cache
1033 .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_UNORMTraits>>())
1034 .first->second.get();
1035 break;
1036 case RenderTargetFormat::A8R8G8B8_SRGB:
1037 return impl->converters_cache
1038 .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_SRGBTraits>>())
1039 .first->second.get();
1040 break;
1041 case RenderTargetFormat::A2B10G10R10_UNORM:
1042 return impl->converters_cache
1043 .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UNORMTraits>>())
1044 .first->second.get();
1045 break;
1046 case RenderTargetFormat::A2B10G10R10_UINT:
1047 return impl->converters_cache
1048 .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UINTTraits>>())
1049 .first->second.get();
1050 break;
1051 case RenderTargetFormat::A2R10G10B10_UNORM:
1052 return impl->converters_cache
1053 .emplace(format, std::make_unique<ConverterImpl<A2R10G10B10_UNORMTraits>>())
1054 .first->second.get();
1055 break;
1056 case RenderTargetFormat::A8B8G8R8_UNORM:
1057 return impl->converters_cache
1058 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UNORMTraits>>())
1059 .first->second.get();
1060 break;
1061 case RenderTargetFormat::A8B8G8R8_SRGB:
1062 return impl->converters_cache
1063 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SRGBTraits>>())
1064 .first->second.get();
1065 break;
1066 case RenderTargetFormat::A8B8G8R8_SNORM:
1067 return impl->converters_cache
1068 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SNORMTraits>>())
1069 .first->second.get();
1070 break;
1071 case RenderTargetFormat::A8B8G8R8_SINT:
1072 return impl->converters_cache
1073 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SINTTraits>>())
1074 .first->second.get();
1075 break;
1076 case RenderTargetFormat::A8B8G8R8_UINT:
1077 return impl->converters_cache
1078 .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UINTTraits>>())
1079 .first->second.get();
1080 break;
1081 case RenderTargetFormat::R16G16_UNORM:
1082 return impl->converters_cache
1083 .emplace(format, std::make_unique<ConverterImpl<R16G16_UNORMTraits>>())
1084 .first->second.get();
1085 break;
1086 case RenderTargetFormat::R16G16_SNORM:
1087 return impl->converters_cache
1088 .emplace(format, std::make_unique<ConverterImpl<R16G16_SNORMTraits>>())
1089 .first->second.get();
1090 break;
1091 case RenderTargetFormat::R16G16_SINT:
1092 return impl->converters_cache
1093 .emplace(format, std::make_unique<ConverterImpl<R16G16_SINTTraits>>())
1094 .first->second.get();
1095 break;
1096 case RenderTargetFormat::R16G16_UINT:
1097 return impl->converters_cache
1098 .emplace(format, std::make_unique<ConverterImpl<R16G16_UINTTraits>>())
1099 .first->second.get();
1100 break;
1101 case RenderTargetFormat::R16G16_FLOAT:
1102 return impl->converters_cache
1103 .emplace(format, std::make_unique<ConverterImpl<R16G16_FLOATTraits>>())
1104 .first->second.get();
1105 break;
1106 case RenderTargetFormat::B10G11R11_FLOAT:
1107 return impl->converters_cache
1108 .emplace(format, std::make_unique<ConverterImpl<B10G11R11_FLOATTraits>>())
1109 .first->second.get();
1110 break;
1111 case RenderTargetFormat::R32_SINT:
1112 return impl->converters_cache
1113 .emplace(format, std::make_unique<ConverterImpl<R32_SINTTraits>>())
1114 .first->second.get();
1115 break;
1116 case RenderTargetFormat::R32_UINT:
1117 return impl->converters_cache
1118 .emplace(format, std::make_unique<ConverterImpl<R32_UINTTraits>>())
1119 .first->second.get();
1120 break;
1121 case RenderTargetFormat::R32_FLOAT:
1122 return impl->converters_cache
1123 .emplace(format, std::make_unique<ConverterImpl<R32_FLOATTraits>>())
1124 .first->second.get();
1125 break;
1126 case RenderTargetFormat::X8R8G8B8_UNORM:
1127 return impl->converters_cache
1128 .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_UNORMTraits>>())
1129 .first->second.get();
1130 break;
1131 case RenderTargetFormat::X8R8G8B8_SRGB:
1132 return impl->converters_cache
1133 .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_SRGBTraits>>())
1134 .first->second.get();
1135 break;
1136 case RenderTargetFormat::R5G6B5_UNORM:
1137 return impl->converters_cache
1138 .emplace(format, std::make_unique<ConverterImpl<R5G6B5_UNORMTraits>>())
1139 .first->second.get();
1140 break;
1141 case RenderTargetFormat::A1R5G5B5_UNORM:
1142 return impl->converters_cache
1143 .emplace(format, std::make_unique<ConverterImpl<A1R5G5B5_UNORMTraits>>())
1144 .first->second.get();
1145 break;
1146 case RenderTargetFormat::R8G8_UNORM:
1147 return impl->converters_cache
1148 .emplace(format, std::make_unique<ConverterImpl<R8G8_UNORMTraits>>())
1149 .first->second.get();
1150 break;
1151 case RenderTargetFormat::R8G8_SNORM:
1152 return impl->converters_cache
1153 .emplace(format, std::make_unique<ConverterImpl<R8G8_SNORMTraits>>())
1154 .first->second.get();
1155 break;
1156 case RenderTargetFormat::R8G8_SINT:
1157 return impl->converters_cache
1158 .emplace(format, std::make_unique<ConverterImpl<R8G8_SINTTraits>>())
1159 .first->second.get();
1160 break;
1161 case RenderTargetFormat::R8G8_UINT:
1162 return impl->converters_cache
1163 .emplace(format, std::make_unique<ConverterImpl<R8G8_UINTTraits>>())
1164 .first->second.get();
1165 break;
1166 case RenderTargetFormat::R16_UNORM:
1167 return impl->converters_cache
1168 .emplace(format, std::make_unique<ConverterImpl<R16_UNORMTraits>>())
1169 .first->second.get();
1170 break;
1171 case RenderTargetFormat::R16_SNORM:
1172 return impl->converters_cache
1173 .emplace(format, std::make_unique<ConverterImpl<R16_SNORMTraits>>())
1174 .first->second.get();
1175 break;
1176 case RenderTargetFormat::R16_SINT:
1177 return impl->converters_cache
1178 .emplace(format, std::make_unique<ConverterImpl<R16_SINTTraits>>())
1179 .first->second.get();
1180 break;
1181 case RenderTargetFormat::R16_UINT:
1182 return impl->converters_cache
1183 .emplace(format, std::make_unique<ConverterImpl<R16_UINTTraits>>())
1184 .first->second.get();
1185 break;
1186 case RenderTargetFormat::R16_FLOAT:
1187 return impl->converters_cache
1188 .emplace(format, std::make_unique<ConverterImpl<R16_FLOATTraits>>())
1189 .first->second.get();
1190 break;
1191 case RenderTargetFormat::R8_UNORM:
1192 return impl->converters_cache
1193 .emplace(format, std::make_unique<ConverterImpl<R8_UNORMTraits>>())
1194 .first->second.get();
1195 break;
1196 case RenderTargetFormat::R8_SNORM:
1197 return impl->converters_cache
1198 .emplace(format, std::make_unique<ConverterImpl<R8_SNORMTraits>>())
1199 .first->second.get();
1200 break;
1201 case RenderTargetFormat::R8_SINT:
1202 return impl->converters_cache
1203 .emplace(format, std::make_unique<ConverterImpl<R8_SINTTraits>>())
1204 .first->second.get();
1205 break;
1206 case RenderTargetFormat::R8_UINT:
1207 return impl->converters_cache
1208 .emplace(format, std::make_unique<ConverterImpl<R8_UINTTraits>>())
1209 .first->second.get();
1210 break;
1211 case RenderTargetFormat::X1R5G5B5_UNORM:
1212 return impl->converters_cache
1213 .emplace(format, std::make_unique<ConverterImpl<X1R5G5B5_UNORMTraits>>())
1214 .first->second.get();
1215 break;
1216 case RenderTargetFormat::X8B8G8R8_UNORM:
1217 return impl->converters_cache
1218 .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_UNORMTraits>>())
1219 .first->second.get();
1220 break;
1221 case RenderTargetFormat::X8B8G8R8_SRGB:
1222 return impl->converters_cache
1223 .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_SRGBTraits>>())
1224 .first->second.get();
1225 break;
1226 default: {
1227 UNIMPLEMENTED_MSG("This format {} converter is not implemented", format);
1228 return impl->converters_cache.emplace(format, std::make_unique<NullConverter>())
1229 .first->second.get();
1230 }
1231 }
1232}
1233
1234} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h
new file mode 100644
index 000000000..f9bdc516e
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/converter.h
@@ -0,0 +1,36 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <memory>
7#include <span>
8
9#include "common/common_types.h"
10
11#include "video_core/gpu.h"
12
13namespace Tegra::Engines::Blitter {
14
15class Converter {
16public:
17 virtual void ConvertTo(std::span<const u8> input, std::span<f32> output) = 0;
18 virtual void ConvertFrom(std::span<const f32> input, std::span<u8> output) = 0;
19 virtual ~Converter() = default;
20};
21
22class ConverterFactory {
23public:
24 ConverterFactory();
25 ~ConverterFactory();
26
27 Converter* GetFormatConverter(RenderTargetFormat format);
28
29private:
30 Converter* BuildConverter(RenderTargetFormat format);
31
32 struct ConverterFactoryImpl;
33 std::unique_ptr<ConverterFactoryImpl> impl;
34};
35
36} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/generate_converters.py b/src/video_core/engines/sw_blitter/generate_converters.py
new file mode 100644
index 000000000..f641564f7
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/generate_converters.py
@@ -0,0 +1,136 @@
1# SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2# SPDX-License-Identifier: GPL-3.0-or-later
3
4import re
5
6class Format:
7 def __init__(self, string_value):
8 self.name = string_value
9 tmp = string_value.split('_')
10 self.component_type = tmp[1]
11 component_data = re.findall(r"\w\d+", tmp[0])
12 self.num_components = len(component_data)
13 sizes = []
14 swizzle = []
15 for data in component_data:
16 swizzle.append(data[0])
17 sizes.append(int(data[1:]))
18 self.sizes = sizes
19 self.swizzle = swizzle
20
21 def build_component_type_array(self):
22 result = "{ "
23 b = False
24 for i in range(0, self.num_components):
25 if b:
26 result += ", "
27 b = True
28 result += "ComponentType::" + self.component_type
29 result += " }"
30 return result
31
32 def build_component_sizes_array(self):
33 result = "{ "
34 b = False
35 for i in range(0, self.num_components):
36 if b:
37 result += ", "
38 b = True
39 result += str(self.sizes[i])
40 result += " }"
41 return result
42
43 def build_component_swizzle_array(self):
44 result = "{ "
45 b = False
46 for i in range(0, self.num_components):
47 if b:
48 result += ", "
49 b = True
50 swizzle = self.swizzle[i]
51 if swizzle == "X":
52 swizzle = "None"
53 result += "Swizzle::" + swizzle
54 result += " }"
55 return result
56
57 def print_declaration(self):
58 print("struct " + self.name + "Traits {")
59 print(" static constexpr size_t num_components = " + str(self.num_components) + ";")
60 print(" static constexpr std::array<ComponentType, num_components> component_types = " + self.build_component_type_array() + ";")
61 print(" static constexpr std::array<size_t, num_components> component_sizes = " + self.build_component_sizes_array() + ";")
62 print(" static constexpr std::array<Swizzle, num_components> component_swizzle = " + self.build_component_swizzle_array() + ";")
63 print("};\n")
64
65 def print_case(self):
66 print("case RenderTargetFormat::" + self.name + ":")
67 print(" return impl->converters_cache")
68 print(" .emplace(format, std::make_unique<ConverterImpl<" + self.name + "Traits>>())")
69 print(" .first->second.get();")
70 print(" break;")
71
72txt = """
73R32G32B32A32_FLOAT
74R32G32B32A32_SINT
75R32G32B32A32_UINT
76R32G32B32X32_FLOAT
77R32G32B32X32_SINT
78R32G32B32X32_UINT
79R16G16B16A16_UNORM
80R16G16B16A16_SNORM
81R16G16B16A16_SINT
82R16G16B16A16_UINT
83R16G16B16A16_FLOAT
84R32G32_FLOAT
85R32G32_SINT
86R32G32_UINT
87R16G16B16X16_FLOAT
88A8R8G8B8_UNORM
89A8R8G8B8_SRGB
90A2B10G10R10_UNORM
91A2B10G10R10_UINT
92A2R10G10B10_UNORM
93A8B8G8R8_UNORM
94A8B8G8R8_SRGB
95A8B8G8R8_SNORM
96A8B8G8R8_SINT
97A8B8G8R8_UINT
98R16G16_UNORM
99R16G16_SNORM
100R16G16_SINT
101R16G16_UINT
102R16G16_FLOAT
103B10G11R11_FLOAT
104R32_SINT
105R32_UINT
106R32_FLOAT
107X8R8G8B8_UNORM
108X8R8G8B8_SRGB
109R5G6B5_UNORM
110A1R5G5B5_UNORM
111R8G8_UNORM
112R8G8_SNORM
113R8G8_SINT
114R8G8_UINT
115R16_UNORM
116R16_SNORM
117R16_SINT
118R16_UINT
119R16_FLOAT
120R8_UNORM
121R8_SNORM
122R8_SINT
123R8_UINT
124X1R5G5B5_UNORM
125X8B8G8R8_UNORM
126X8B8G8R8_SRGB
127"""
128
129x = txt.split()
130y = list(map(lambda a: Format(a), x))
131formats = list(y)
132for format in formats:
133 format.print_declaration()
134
135for format in formats:
136 format.print_case()
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d0709dc69..8a871593a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -27,12 +27,12 @@ struct CommandList;
27// TODO: Implement the commented ones 27// TODO: Implement the commented ones
28enum class RenderTargetFormat : u32 { 28enum class RenderTargetFormat : u32 {
29 NONE = 0x0, 29 NONE = 0x0,
30 R32B32G32A32_FLOAT = 0xC0, 30 R32G32B32A32_FLOAT = 0xC0,
31 R32G32B32A32_SINT = 0xC1, 31 R32G32B32A32_SINT = 0xC1,
32 R32G32B32A32_UINT = 0xC2, 32 R32G32B32A32_UINT = 0xC2,
33 // R32G32B32X32_FLOAT = 0xC3, 33 R32G32B32X32_FLOAT = 0xC3,
34 // R32G32B32X32_SINT = 0xC4, 34 R32G32B32X32_SINT = 0xC4,
35 // R32G32B32X32_UINT = 0xC5, 35 R32G32B32X32_UINT = 0xC5,
36 R16G16B16A16_UNORM = 0xC6, 36 R16G16B16A16_UNORM = 0xC6,
37 R16G16B16A16_SNORM = 0xC7, 37 R16G16B16A16_SNORM = 0xC7,
38 R16G16B16A16_SINT = 0xC8, 38 R16G16B16A16_SINT = 0xC8,
@@ -56,13 +56,13 @@ enum class RenderTargetFormat : u32 {
56 R16G16_SINT = 0xDC, 56 R16G16_SINT = 0xDC,
57 R16G16_UINT = 0xDD, 57 R16G16_UINT = 0xDD,
58 R16G16_FLOAT = 0xDE, 58 R16G16_FLOAT = 0xDE,
59 // A2R10G10B10_UNORM = 0xDF, 59 A2R10G10B10_UNORM = 0xDF,
60 B10G11R11_FLOAT = 0xE0, 60 B10G11R11_FLOAT = 0xE0,
61 R32_SINT = 0xE3, 61 R32_SINT = 0xE3,
62 R32_UINT = 0xE4, 62 R32_UINT = 0xE4,
63 R32_FLOAT = 0xE5, 63 R32_FLOAT = 0xE5,
64 // X8R8G8B8_UNORM = 0xE6, 64 X8R8G8B8_UNORM = 0xE6,
65 // X8R8G8B8_SRGB = 0xE7, 65 X8R8G8B8_SRGB = 0xE7,
66 R5G6B5_UNORM = 0xE8, 66 R5G6B5_UNORM = 0xE8,
67 A1R5G5B5_UNORM = 0xE9, 67 A1R5G5B5_UNORM = 0xE9,
68 R8G8_UNORM = 0xEA, 68 R8G8_UNORM = 0xEA,
@@ -79,11 +79,11 @@ enum class RenderTargetFormat : u32 {
79 R8_SINT = 0xF5, 79 R8_SINT = 0xF5,
80 R8_UINT = 0xF6, 80 R8_UINT = 0xF6,
81 81
82 /* 82 // A8_UNORM = 0xF7,
83 A8_UNORM = 0xF7,
84 X1R5G5B5_UNORM = 0xF8, 83 X1R5G5B5_UNORM = 0xF8,
85 X8B8G8R8_UNORM = 0xF9, 84 X8B8G8R8_UNORM = 0xF9,
86 X8B8G8R8_SRGB = 0xFA, 85 X8B8G8R8_SRGB = 0xFA,
86 /*
87 Z1R5G5B5_UNORM = 0xFB, 87 Z1R5G5B5_UNORM = 0xFB,
88 O1R5G5B5_UNORM = 0xFC, 88 O1R5G5B5_UNORM = 0xFC,
89 Z8R8G8B8_UNORM = 0xFD, 89 Z8R8G8B8_UNORM = 0xFD,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 354c6e429..f71a316b6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -466,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
466 const Tegra::Engines::Fermi2D::Config& copy_config) { 466 const Tegra::Engines::Fermi2D::Config& copy_config) {
467 MICROPROFILE_SCOPE(OpenGL_Blits); 467 MICROPROFILE_SCOPE(OpenGL_Blits);
468 std::scoped_lock lock{texture_cache.mutex}; 468 std::scoped_lock lock{texture_cache.mutex};
469 texture_cache.BlitImage(dst, src, copy_config); 469 return texture_cache.BlitImage(dst, src, copy_config);
470 return true;
471} 470}
472 471
473Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { 472Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index e14f9b2db..ef1190e1f 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -28,6 +28,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
28 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM 28 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
29 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM 29 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
30 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT 30 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
31 {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2R10G10B10_UNORM
31 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM 32 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
32 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM 33 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM
33 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM 34 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 5c156087b..1da53f203 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -125,6 +125,7 @@ struct FormatTuple {
125 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM 125 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
126 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM 126 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
127 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT 127 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
128 {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM
128 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) 129 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
129 {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled) 130 {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled)
130 {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM 131 {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 12b13cc59..d8ad8815c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -542,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
542 const Tegra::Engines::Fermi2D::Surface& dst, 542 const Tegra::Engines::Fermi2D::Surface& dst,
543 const Tegra::Engines::Fermi2D::Config& copy_config) { 543 const Tegra::Engines::Fermi2D::Config& copy_config) {
544 std::scoped_lock lock{texture_cache.mutex}; 544 std::scoped_lock lock{texture_cache.mutex};
545 texture_cache.BlitImage(dst, src, copy_config); 545 return texture_cache.BlitImage(dst, src, copy_config);
546 return true;
547} 546}
548 547
549Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { 548Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() {
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 6bd133d10..b618e1a25 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -93,11 +93,14 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
93 93
94PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { 94PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
95 switch (format) { 95 switch (format) {
96 case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT: 96 case Tegra::RenderTargetFormat::R32G32B32A32_FLOAT:
97 case Tegra::RenderTargetFormat::R32G32B32X32_FLOAT:
97 return PixelFormat::R32G32B32A32_FLOAT; 98 return PixelFormat::R32G32B32A32_FLOAT;
98 case Tegra::RenderTargetFormat::R32G32B32A32_SINT: 99 case Tegra::RenderTargetFormat::R32G32B32A32_SINT:
100 case Tegra::RenderTargetFormat::R32G32B32X32_SINT:
99 return PixelFormat::R32G32B32A32_SINT; 101 return PixelFormat::R32G32B32A32_SINT;
100 case Tegra::RenderTargetFormat::R32G32B32A32_UINT: 102 case Tegra::RenderTargetFormat::R32G32B32A32_UINT:
103 case Tegra::RenderTargetFormat::R32G32B32X32_UINT:
101 return PixelFormat::R32G32B32A32_UINT; 104 return PixelFormat::R32G32B32A32_UINT;
102 case Tegra::RenderTargetFormat::R16G16B16A16_UNORM: 105 case Tegra::RenderTargetFormat::R16G16B16A16_UNORM:
103 return PixelFormat::R16G16B16A16_UNORM; 106 return PixelFormat::R16G16B16A16_UNORM;
@@ -118,16 +121,22 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
118 case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT: 121 case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT:
119 return PixelFormat::R16G16B16X16_FLOAT; 122 return PixelFormat::R16G16B16X16_FLOAT;
120 case Tegra::RenderTargetFormat::A8R8G8B8_UNORM: 123 case Tegra::RenderTargetFormat::A8R8G8B8_UNORM:
124 case Tegra::RenderTargetFormat::X8R8G8B8_UNORM:
121 return PixelFormat::B8G8R8A8_UNORM; 125 return PixelFormat::B8G8R8A8_UNORM;
122 case Tegra::RenderTargetFormat::A8R8G8B8_SRGB: 126 case Tegra::RenderTargetFormat::A8R8G8B8_SRGB:
127 case Tegra::RenderTargetFormat::X8R8G8B8_SRGB:
123 return PixelFormat::B8G8R8A8_SRGB; 128 return PixelFormat::B8G8R8A8_SRGB;
124 case Tegra::RenderTargetFormat::A2B10G10R10_UNORM: 129 case Tegra::RenderTargetFormat::A2B10G10R10_UNORM:
125 return PixelFormat::A2B10G10R10_UNORM; 130 return PixelFormat::A2B10G10R10_UNORM;
126 case Tegra::RenderTargetFormat::A2B10G10R10_UINT: 131 case Tegra::RenderTargetFormat::A2B10G10R10_UINT:
127 return PixelFormat::A2B10G10R10_UINT; 132 return PixelFormat::A2B10G10R10_UINT;
133 case Tegra::RenderTargetFormat::A2R10G10B10_UNORM:
134 return PixelFormat::A2R10G10B10_UNORM;
128 case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: 135 case Tegra::RenderTargetFormat::A8B8G8R8_UNORM:
136 case Tegra::RenderTargetFormat::X8B8G8R8_UNORM:
129 return PixelFormat::A8B8G8R8_UNORM; 137 return PixelFormat::A8B8G8R8_UNORM;
130 case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: 138 case Tegra::RenderTargetFormat::A8B8G8R8_SRGB:
139 case Tegra::RenderTargetFormat::X8B8G8R8_SRGB:
131 return PixelFormat::A8B8G8R8_SRGB; 140 return PixelFormat::A8B8G8R8_SRGB;
132 case Tegra::RenderTargetFormat::A8B8G8R8_SNORM: 141 case Tegra::RenderTargetFormat::A8B8G8R8_SNORM:
133 return PixelFormat::A8B8G8R8_SNORM; 142 return PixelFormat::A8B8G8R8_SNORM;
@@ -156,6 +165,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
156 case Tegra::RenderTargetFormat::R5G6B5_UNORM: 165 case Tegra::RenderTargetFormat::R5G6B5_UNORM:
157 return PixelFormat::R5G6B5_UNORM; 166 return PixelFormat::R5G6B5_UNORM;
158 case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: 167 case Tegra::RenderTargetFormat::A1R5G5B5_UNORM:
168 case Tegra::RenderTargetFormat::X1R5G5B5_UNORM:
159 return PixelFormat::A1R5G5B5_UNORM; 169 return PixelFormat::A1R5G5B5_UNORM;
160 case Tegra::RenderTargetFormat::R8G8_UNORM: 170 case Tegra::RenderTargetFormat::R8G8_UNORM:
161 return PixelFormat::R8G8_UNORM; 171 return PixelFormat::R8G8_UNORM;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 57ca7f597..44b79af20 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -23,6 +23,7 @@ enum class PixelFormat {
23 A1R5G5B5_UNORM, 23 A1R5G5B5_UNORM,
24 A2B10G10R10_UNORM, 24 A2B10G10R10_UNORM,
25 A2B10G10R10_UINT, 25 A2B10G10R10_UINT,
26 A2R10G10B10_UNORM,
26 A1B5G5R5_UNORM, 27 A1B5G5R5_UNORM,
27 A5B5G5R1_UNORM, 28 A5B5G5R1_UNORM,
28 R8_UNORM, 29 R8_UNORM,
@@ -159,6 +160,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
159 1, // A1R5G5B5_UNORM 160 1, // A1R5G5B5_UNORM
160 1, // A2B10G10R10_UNORM 161 1, // A2B10G10R10_UNORM
161 1, // A2B10G10R10_UINT 162 1, // A2B10G10R10_UINT
163 1, // A2R10G10B10_UNORM
162 1, // A1B5G5R5_UNORM 164 1, // A1B5G5R5_UNORM
163 1, // A5B5G5R1_UNORM 165 1, // A5B5G5R1_UNORM
164 1, // R8_UNORM 166 1, // R8_UNORM
@@ -264,6 +266,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
264 1, // A1R5G5B5_UNORM 266 1, // A1R5G5B5_UNORM
265 1, // A2B10G10R10_UNORM 267 1, // A2B10G10R10_UNORM
266 1, // A2B10G10R10_UINT 268 1, // A2B10G10R10_UINT
269 1, // A2R10G10B10_UNORM
267 1, // A1B5G5R5_UNORM 270 1, // A1B5G5R5_UNORM
268 1, // A5B5G5R1_UNORM 271 1, // A5B5G5R1_UNORM
269 1, // R8_UNORM 272 1, // R8_UNORM
@@ -369,6 +372,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
369 16, // A1R5G5B5_UNORM 372 16, // A1R5G5B5_UNORM
370 32, // A2B10G10R10_UNORM 373 32, // A2B10G10R10_UNORM
371 32, // A2B10G10R10_UINT 374 32, // A2B10G10R10_UINT
375 32, // A2R10G10B10_UNORM
372 16, // A1B5G5R5_UNORM 376 16, // A1B5G5R5_UNORM
373 16, // A5B5G5R1_UNORM 377 16, // A5B5G5R1_UNORM
374 8, // R8_UNORM 378 8, // R8_UNORM
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index acc854715..f1f0a057b 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -35,6 +35,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
35 return "A2B10G10R10_UNORM"; 35 return "A2B10G10R10_UNORM";
36 case PixelFormat::A2B10G10R10_UINT: 36 case PixelFormat::A2B10G10R10_UINT:
37 return "A2B10G10R10_UINT"; 37 return "A2B10G10R10_UINT";
38 case PixelFormat::A2R10G10B10_UNORM:
39 return "A2R10G10B10_UNORM";
38 case PixelFormat::A1B5G5R5_UNORM: 40 case PixelFormat::A1B5G5R5_UNORM:
39 return "A1B5G5R5_UNORM"; 41 return "A1B5G5R5_UNORM";
40 case PixelFormat::A5B5G5R1_UNORM: 42 case PixelFormat::A5B5G5R1_UNORM:
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8ef75fe73..8e68a2e53 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -506,10 +506,14 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
506} 506}
507 507
508template <class P> 508template <class P>
509void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 509bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
510 const Tegra::Engines::Fermi2D::Surface& src, 510 const Tegra::Engines::Fermi2D::Surface& src,
511 const Tegra::Engines::Fermi2D::Config& copy) { 511 const Tegra::Engines::Fermi2D::Config& copy) {
512 const BlitImages images = GetBlitImages(dst, src, copy); 512 const auto result = GetBlitImages(dst, src, copy);
513 if (!result) {
514 return false;
515 }
516 const BlitImages images = *result;
513 const ImageId dst_id = images.dst_id; 517 const ImageId dst_id = images.dst_id;
514 const ImageId src_id = images.src_id; 518 const ImageId src_id = images.src_id;
515 519
@@ -596,6 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
596 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, 600 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
597 copy.operation); 601 copy.operation);
598 } 602 }
603 return true;
599} 604}
600 605
601template <class P> 606template <class P>
@@ -1133,7 +1138,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1133} 1138}
1134 1139
1135template <class P> 1140template <class P>
1136typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( 1141std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImages(
1137 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, 1142 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
1138 const Tegra::Engines::Fermi2D::Config& copy) { 1143 const Tegra::Engines::Fermi2D::Config& copy) {
1139 1144
@@ -1154,6 +1159,20 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1154 has_deleted_images = false; 1159 has_deleted_images = false;
1155 src_id = FindImage(src_info, src_addr, try_options); 1160 src_id = FindImage(src_info, src_addr, try_options);
1156 dst_id = FindImage(dst_info, dst_addr, try_options); 1161 dst_id = FindImage(dst_info, dst_addr, try_options);
1162 if (!copy.must_accelerate) {
1163 do {
1164 if (!src_id && !dst_id) {
1165 return std::nullopt;
1166 }
1167 if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) {
1168 break;
1169 }
1170 if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) {
1171 break;
1172 }
1173 return std::nullopt;
1174 } while (false);
1175 }
1157 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; 1176 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1158 if (src_image && src_image->info.num_samples > 1) { 1177 if (src_image && src_image->info.num_samples > 1) {
1159 RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; 1178 RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
@@ -1194,12 +1213,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1194 dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); 1213 dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
1195 } while (has_deleted_images); 1214 } while (has_deleted_images);
1196 } 1215 }
1197 return BlitImages{ 1216 return {BlitImages{
1198 .dst_id = dst_id, 1217 .dst_id = dst_id,
1199 .src_id = src_id, 1218 .src_id = src_id,
1200 .dst_format = dst_info.format, 1219 .dst_format = dst_info.format,
1201 .src_format = src_info.format, 1220 .src_format = src_info.format,
1202 }; 1221 }};
1203} 1222}
1204 1223
1205template <class P> 1224template <class P>
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 2fa8445eb..9db7195bf 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -174,7 +174,7 @@ public:
174 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); 174 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
175 175
176 /// Blit an image with the given parameters 176 /// Blit an image with the given parameters
177 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 177 bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
178 const Tegra::Engines::Fermi2D::Surface& src, 178 const Tegra::Engines::Fermi2D::Surface& src,
179 const Tegra::Engines::Fermi2D::Config& copy); 179 const Tegra::Engines::Fermi2D::Config& copy);
180 180
@@ -285,9 +285,9 @@ private:
285 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); 285 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
286 286
287 /// Return a blit image pair from the given guest blit parameters 287 /// Return a blit image pair from the given guest blit parameters
288 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, 288 [[nodiscard]] std::optional<BlitImages> GetBlitImages(
289 const Tegra::Engines::Fermi2D::Surface& src, 289 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
290 const Tegra::Engines::Fermi2D::Config& copy); 290 const Tegra::Engines::Fermi2D::Config& copy);
291 291
292 /// Find or create a sampler from a guest descriptor sampler 292 /// Find or create a sampler from a guest descriptor sampler
293 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); 293 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);