diff options
Diffstat (limited to 'src')
40 files changed, 1936 insertions, 124 deletions
diff --git a/src/audio_core/renderer/command/command_buffer.cpp b/src/audio_core/renderer/command/command_buffer.cpp index 2ef879ee1..8c6fe97e7 100644 --- a/src/audio_core/renderer/command/command_buffer.cpp +++ b/src/audio_core/renderer/command/command_buffer.cpp | |||
| @@ -460,21 +460,23 @@ void CommandBuffer::GenerateDeviceSinkCommand(const s32 node_id, const s16 buffe | |||
| 460 | 460 | ||
| 461 | cmd.session_id = session_id; | 461 | cmd.session_id = session_id; |
| 462 | 462 | ||
| 463 | cmd.input_count = parameter.input_count; | ||
| 464 | s16 max_input{0}; | ||
| 465 | for (u32 i = 0; i < parameter.input_count; i++) { | ||
| 466 | cmd.inputs[i] = buffer_offset + parameter.inputs[i]; | ||
| 467 | max_input = std::max(max_input, cmd.inputs[i]); | ||
| 468 | } | ||
| 469 | |||
| 463 | if (state.upsampler_info != nullptr) { | 470 | if (state.upsampler_info != nullptr) { |
| 464 | const auto size_{state.upsampler_info->sample_count * parameter.input_count}; | 471 | const auto size_{state.upsampler_info->sample_count * parameter.input_count}; |
| 465 | const auto size_bytes{size_ * sizeof(s32)}; | 472 | const auto size_bytes{size_ * sizeof(s32)}; |
| 466 | const auto addr{memory_pool->Translate(state.upsampler_info->samples_pos, size_bytes)}; | 473 | const auto addr{memory_pool->Translate(state.upsampler_info->samples_pos, size_bytes)}; |
| 467 | cmd.sample_buffer = {reinterpret_cast<s32*>(addr), | 474 | cmd.sample_buffer = {reinterpret_cast<s32*>(addr), |
| 468 | parameter.input_count * state.upsampler_info->sample_count}; | 475 | (max_input + 1) * state.upsampler_info->sample_count}; |
| 469 | } else { | 476 | } else { |
| 470 | cmd.sample_buffer = samples_buffer; | 477 | cmd.sample_buffer = samples_buffer; |
| 471 | } | 478 | } |
| 472 | 479 | ||
| 473 | cmd.input_count = parameter.input_count; | ||
| 474 | for (u32 i = 0; i < parameter.input_count; i++) { | ||
| 475 | cmd.inputs[i] = buffer_offset + parameter.inputs[i]; | ||
| 476 | } | ||
| 477 | |||
| 478 | GenerateEnd<DeviceSinkCommand>(cmd); | 480 | GenerateEnd<DeviceSinkCommand>(cmd); |
| 479 | } | 481 | } |
| 480 | 482 | ||
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index d87be72d6..e252b5f4b 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h | |||
| @@ -199,7 +199,7 @@ public: | |||
| 199 | ~HLERequestContext(); | 199 | ~HLERequestContext(); |
| 200 | 200 | ||
| 201 | /// Returns a pointer to the IPC command buffer for this request. | 201 | /// Returns a pointer to the IPC command buffer for this request. |
| 202 | u32* CommandBuffer() { | 202 | [[nodiscard]] u32* CommandBuffer() { |
| 203 | return cmd_buf.data(); | 203 | return cmd_buf.data(); |
| 204 | } | 204 | } |
| 205 | 205 | ||
| @@ -207,7 +207,7 @@ public: | |||
| 207 | * Returns the session through which this request was made. This can be used as a map key to | 207 | * Returns the session through which this request was made. This can be used as a map key to |
| 208 | * access per-client data on services. | 208 | * access per-client data on services. |
| 209 | */ | 209 | */ |
| 210 | Kernel::KServerSession* Session() { | 210 | [[nodiscard]] Kernel::KServerSession* Session() { |
| 211 | return server_session; | 211 | return server_session; |
| 212 | } | 212 | } |
| 213 | 213 | ||
| @@ -217,61 +217,61 @@ public: | |||
| 217 | /// Writes data from this context back to the requesting process/thread. | 217 | /// Writes data from this context back to the requesting process/thread. |
| 218 | Result WriteToOutgoingCommandBuffer(KThread& requesting_thread); | 218 | Result WriteToOutgoingCommandBuffer(KThread& requesting_thread); |
| 219 | 219 | ||
| 220 | u32_le GetHipcCommand() const { | 220 | [[nodiscard]] u32_le GetHipcCommand() const { |
| 221 | return command; | 221 | return command; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | u32_le GetTipcCommand() const { | 224 | [[nodiscard]] u32_le GetTipcCommand() const { |
| 225 | return static_cast<u32_le>(command_header->type.Value()) - | 225 | return static_cast<u32_le>(command_header->type.Value()) - |
| 226 | static_cast<u32_le>(IPC::CommandType::TIPC_CommandRegion); | 226 | static_cast<u32_le>(IPC::CommandType::TIPC_CommandRegion); |
| 227 | } | 227 | } |
| 228 | 228 | ||
| 229 | u32_le GetCommand() const { | 229 | [[nodiscard]] u32_le GetCommand() const { |
| 230 | return command_header->IsTipc() ? GetTipcCommand() : GetHipcCommand(); | 230 | return command_header->IsTipc() ? GetTipcCommand() : GetHipcCommand(); |
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | bool IsTipc() const { | 233 | [[nodiscard]] bool IsTipc() const { |
| 234 | return command_header->IsTipc(); | 234 | return command_header->IsTipc(); |
| 235 | } | 235 | } |
| 236 | 236 | ||
| 237 | IPC::CommandType GetCommandType() const { | 237 | [[nodiscard]] IPC::CommandType GetCommandType() const { |
| 238 | return command_header->type; | 238 | return command_header->type; |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | u64 GetPID() const { | 241 | [[nodiscard]] u64 GetPID() const { |
| 242 | return pid; | 242 | return pid; |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | u32 GetDataPayloadOffset() const { | 245 | [[nodiscard]] u32 GetDataPayloadOffset() const { |
| 246 | return data_payload_offset; | 246 | return data_payload_offset; |
| 247 | } | 247 | } |
| 248 | 248 | ||
| 249 | const std::vector<IPC::BufferDescriptorX>& BufferDescriptorX() const { | 249 | [[nodiscard]] const std::vector<IPC::BufferDescriptorX>& BufferDescriptorX() const { |
| 250 | return buffer_x_desciptors; | 250 | return buffer_x_desciptors; |
| 251 | } | 251 | } |
| 252 | 252 | ||
| 253 | const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorA() const { | 253 | [[nodiscard]] const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorA() const { |
| 254 | return buffer_a_desciptors; | 254 | return buffer_a_desciptors; |
| 255 | } | 255 | } |
| 256 | 256 | ||
| 257 | const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorB() const { | 257 | [[nodiscard]] const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorB() const { |
| 258 | return buffer_b_desciptors; | 258 | return buffer_b_desciptors; |
| 259 | } | 259 | } |
| 260 | 260 | ||
| 261 | const std::vector<IPC::BufferDescriptorC>& BufferDescriptorC() const { | 261 | [[nodiscard]] const std::vector<IPC::BufferDescriptorC>& BufferDescriptorC() const { |
| 262 | return buffer_c_desciptors; | 262 | return buffer_c_desciptors; |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | const IPC::DomainMessageHeader& GetDomainMessageHeader() const { | 265 | [[nodiscard]] const IPC::DomainMessageHeader& GetDomainMessageHeader() const { |
| 266 | return domain_message_header.value(); | 266 | return domain_message_header.value(); |
| 267 | } | 267 | } |
| 268 | 268 | ||
| 269 | bool HasDomainMessageHeader() const { | 269 | [[nodiscard]] bool HasDomainMessageHeader() const { |
| 270 | return domain_message_header.has_value(); | 270 | return domain_message_header.has_value(); |
| 271 | } | 271 | } |
| 272 | 272 | ||
| 273 | /// Helper function to read a buffer using the appropriate buffer descriptor | 273 | /// Helper function to read a buffer using the appropriate buffer descriptor |
| 274 | std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const; | 274 | [[nodiscard]] std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const; |
| 275 | 275 | ||
| 276 | /// Helper function to write a buffer using the appropriate buffer descriptor | 276 | /// Helper function to write a buffer using the appropriate buffer descriptor |
| 277 | std::size_t WriteBuffer(const void* buffer, std::size_t size, | 277 | std::size_t WriteBuffer(const void* buffer, std::size_t size, |
| @@ -308,22 +308,34 @@ public: | |||
| 308 | } | 308 | } |
| 309 | 309 | ||
| 310 | /// Helper function to get the size of the input buffer | 310 | /// Helper function to get the size of the input buffer |
| 311 | std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const; | 311 | [[nodiscard]] std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const; |
| 312 | 312 | ||
| 313 | /// Helper function to get the size of the output buffer | 313 | /// Helper function to get the size of the output buffer |
| 314 | std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const; | 314 | [[nodiscard]] std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const; |
| 315 | |||
| 316 | /// Helper function to derive the number of elements able to be contained in the read buffer | ||
| 317 | template <typename T> | ||
| 318 | [[nodiscard]] std::size_t GetReadBufferNumElements(std::size_t buffer_index = 0) const { | ||
| 319 | return GetReadBufferSize(buffer_index) / sizeof(T); | ||
| 320 | } | ||
| 321 | |||
| 322 | /// Helper function to derive the number of elements able to be contained in the write buffer | ||
| 323 | template <typename T> | ||
| 324 | [[nodiscard]] std::size_t GetWriteBufferNumElements(std::size_t buffer_index = 0) const { | ||
| 325 | return GetWriteBufferSize(buffer_index) / sizeof(T); | ||
| 326 | } | ||
| 315 | 327 | ||
| 316 | /// Helper function to test whether the input buffer at buffer_index can be read | 328 | /// Helper function to test whether the input buffer at buffer_index can be read |
| 317 | bool CanReadBuffer(std::size_t buffer_index = 0) const; | 329 | [[nodiscard]] bool CanReadBuffer(std::size_t buffer_index = 0) const; |
| 318 | 330 | ||
| 319 | /// Helper function to test whether the output buffer at buffer_index can be written | 331 | /// Helper function to test whether the output buffer at buffer_index can be written |
| 320 | bool CanWriteBuffer(std::size_t buffer_index = 0) const; | 332 | [[nodiscard]] bool CanWriteBuffer(std::size_t buffer_index = 0) const; |
| 321 | 333 | ||
| 322 | Handle GetCopyHandle(std::size_t index) const { | 334 | [[nodiscard]] Handle GetCopyHandle(std::size_t index) const { |
| 323 | return incoming_copy_handles.at(index); | 335 | return incoming_copy_handles.at(index); |
| 324 | } | 336 | } |
| 325 | 337 | ||
| 326 | Handle GetMoveHandle(std::size_t index) const { | 338 | [[nodiscard]] Handle GetMoveHandle(std::size_t index) const { |
| 327 | return incoming_move_handles.at(index); | 339 | return incoming_move_handles.at(index); |
| 328 | } | 340 | } |
| 329 | 341 | ||
| @@ -348,13 +360,13 @@ public: | |||
| 348 | manager = manager_; | 360 | manager = manager_; |
| 349 | } | 361 | } |
| 350 | 362 | ||
| 351 | std::string Description() const; | 363 | [[nodiscard]] std::string Description() const; |
| 352 | 364 | ||
| 353 | KThread& GetThread() { | 365 | [[nodiscard]] KThread& GetThread() { |
| 354 | return *thread; | 366 | return *thread; |
| 355 | } | 367 | } |
| 356 | 368 | ||
| 357 | std::shared_ptr<SessionRequestManager> GetManager() const { | 369 | [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const { |
| 358 | return manager.lock(); | 370 | return manager.lock(); |
| 359 | } | 371 | } |
| 360 | 372 | ||
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index 608925dfc..053e8f9dd 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp | |||
| @@ -122,10 +122,10 @@ private: | |||
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | void GetReleasedAudioInBuffer(Kernel::HLERequestContext& ctx) { | 124 | void GetReleasedAudioInBuffer(Kernel::HLERequestContext& ctx) { |
| 125 | auto write_buffer_size = ctx.GetWriteBufferSize() / sizeof(u64); | 125 | const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); |
| 126 | std::vector<u64> released_buffers(write_buffer_size, 0); | 126 | std::vector<u64> released_buffers(write_buffer_size); |
| 127 | 127 | ||
| 128 | auto count = impl->GetReleasedBuffers(released_buffers); | 128 | const auto count = impl->GetReleasedBuffers(released_buffers); |
| 129 | 129 | ||
| 130 | [[maybe_unused]] std::string tags{}; | 130 | [[maybe_unused]] std::string tags{}; |
| 131 | for (u32 i = 0; i < count; i++) { | 131 | for (u32 i = 0; i < count; i++) { |
| @@ -228,7 +228,7 @@ void AudInU::ListAudioIns(Kernel::HLERequestContext& ctx) { | |||
| 228 | LOG_DEBUG(Service_Audio, "called"); | 228 | LOG_DEBUG(Service_Audio, "called"); |
| 229 | 229 | ||
| 230 | const auto write_count = | 230 | const auto write_count = |
| 231 | static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); | 231 | static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>()); |
| 232 | std::vector<AudioDevice::AudioDeviceName> device_names{}; | 232 | std::vector<AudioDevice::AudioDeviceName> device_names{}; |
| 233 | 233 | ||
| 234 | u32 out_count{0}; | 234 | u32 out_count{0}; |
| @@ -248,7 +248,7 @@ void AudInU::ListAudioInsAutoFiltered(Kernel::HLERequestContext& ctx) { | |||
| 248 | LOG_DEBUG(Service_Audio, "called"); | 248 | LOG_DEBUG(Service_Audio, "called"); |
| 249 | 249 | ||
| 250 | const auto write_count = | 250 | const auto write_count = |
| 251 | static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); | 251 | static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>()); |
| 252 | std::vector<AudioDevice::AudioDeviceName> device_names{}; | 252 | std::vector<AudioDevice::AudioDeviceName> device_names{}; |
| 253 | 253 | ||
| 254 | u32 out_count{0}; | 254 | u32 out_count{0}; |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 122290c6a..29751f075 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -129,16 +129,16 @@ private: | |||
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | void GetReleasedAudioOutBuffers(Kernel::HLERequestContext& ctx) { | 131 | void GetReleasedAudioOutBuffers(Kernel::HLERequestContext& ctx) { |
| 132 | auto write_buffer_size = ctx.GetWriteBufferSize() / sizeof(u64); | 132 | const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); |
| 133 | std::vector<u64> released_buffers(write_buffer_size, 0); | 133 | std::vector<u64> released_buffers(write_buffer_size); |
| 134 | 134 | ||
| 135 | auto count = impl->GetReleasedBuffers(released_buffers); | 135 | const auto count = impl->GetReleasedBuffers(released_buffers); |
| 136 | 136 | ||
| 137 | [[maybe_unused]] std::string tags{}; | 137 | [[maybe_unused]] std::string tags{}; |
| 138 | for (u32 i = 0; i < count; i++) { | 138 | for (u32 i = 0; i < count; i++) { |
| 139 | tags += fmt::format("{:08X}, ", released_buffers[i]); | 139 | tags += fmt::format("{:08X}, ", released_buffers[i]); |
| 140 | } | 140 | } |
| 141 | [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()}; | 141 | [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()}; |
| 142 | LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count, | 142 | LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count, |
| 143 | tags); | 143 | tags); |
| 144 | 144 | ||
| @@ -244,7 +244,7 @@ void AudOutU::ListAudioOuts(Kernel::HLERequestContext& ctx) { | |||
| 244 | std::scoped_lock l{impl->mutex}; | 244 | std::scoped_lock l{impl->mutex}; |
| 245 | 245 | ||
| 246 | const auto write_count = | 246 | const auto write_count = |
| 247 | static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); | 247 | static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>()); |
| 248 | std::vector<AudioDevice::AudioDeviceName> device_names{}; | 248 | std::vector<AudioDevice::AudioDeviceName> device_names{}; |
| 249 | if (write_count > 0) { | 249 | if (write_count > 0) { |
| 250 | device_names.emplace_back("DeviceOut"); | 250 | device_names.emplace_back("DeviceOut"); |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 13423dca6..034ee273f 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -274,7 +274,7 @@ public: | |||
| 274 | 274 | ||
| 275 | private: | 275 | private: |
| 276 | void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { | 276 | void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { |
| 277 | const size_t in_count = ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName); | 277 | const size_t in_count = ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>(); |
| 278 | 278 | ||
| 279 | std::vector<AudioDevice::AudioDeviceName> out_names{}; | 279 | std::vector<AudioDevice::AudioDeviceName> out_names{}; |
| 280 | 280 | ||
| @@ -335,7 +335,7 @@ private: | |||
| 335 | } | 335 | } |
| 336 | 336 | ||
| 337 | void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { | 337 | void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { |
| 338 | const auto write_size = ctx.GetWriteBufferSize() / sizeof(char); | 338 | const auto write_size = ctx.GetWriteBufferSize(); |
| 339 | std::string out_name{"AudioTvOutput"}; | 339 | std::string out_name{"AudioTvOutput"}; |
| 340 | 340 | ||
| 341 | LOG_DEBUG(Service_Audio, "(STUBBED) called. Name={}", out_name); | 341 | LOG_DEBUG(Service_Audio, "(STUBBED) called. Name={}", out_name); |
| @@ -387,7 +387,7 @@ private: | |||
| 387 | } | 387 | } |
| 388 | 388 | ||
| 389 | void ListAudioOutputDeviceName(Kernel::HLERequestContext& ctx) { | 389 | void ListAudioOutputDeviceName(Kernel::HLERequestContext& ctx) { |
| 390 | const size_t in_count = ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName); | 390 | const size_t in_count = ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>(); |
| 391 | 391 | ||
| 392 | std::vector<AudioDevice::AudioDeviceName> out_names{}; | 392 | std::vector<AudioDevice::AudioDeviceName> out_names{}; |
| 393 | 393 | ||
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index 8bafc3a98..825fb8bcc 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp | |||
| @@ -68,7 +68,7 @@ private: | |||
| 68 | ExtraBehavior extra_behavior) { | 68 | ExtraBehavior extra_behavior) { |
| 69 | u32 consumed = 0; | 69 | u32 consumed = 0; |
| 70 | u32 sample_count = 0; | 70 | u32 sample_count = 0; |
| 71 | std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); | 71 | std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>()); |
| 72 | 72 | ||
| 73 | if (extra_behavior == ExtraBehavior::ResetContext) { | 73 | if (extra_behavior == ExtraBehavior::ResetContext) { |
| 74 | ResetDecoderContext(); | 74 | ResetDecoderContext(); |
diff --git a/src/core/hle/service/bcat/bcat_module.cpp b/src/core/hle/service/bcat/bcat_module.cpp index bc08ac487..cbe690a5d 100644 --- a/src/core/hle/service/bcat/bcat_module.cpp +++ b/src/core/hle/service/bcat/bcat_module.cpp | |||
| @@ -443,7 +443,7 @@ private: | |||
| 443 | } | 443 | } |
| 444 | 444 | ||
| 445 | void Read(Kernel::HLERequestContext& ctx) { | 445 | void Read(Kernel::HLERequestContext& ctx) { |
| 446 | auto write_size = ctx.GetWriteBufferSize() / sizeof(DeliveryCacheDirectoryEntry); | 446 | auto write_size = ctx.GetWriteBufferNumElements<DeliveryCacheDirectoryEntry>(); |
| 447 | 447 | ||
| 448 | LOG_DEBUG(Service_BCAT, "called, write_size={:016X}", write_size); | 448 | LOG_DEBUG(Service_BCAT, "called, write_size={:016X}", write_size); |
| 449 | 449 | ||
| @@ -533,7 +533,7 @@ private: | |||
| 533 | } | 533 | } |
| 534 | 534 | ||
| 535 | void EnumerateDeliveryCacheDirectory(Kernel::HLERequestContext& ctx) { | 535 | void EnumerateDeliveryCacheDirectory(Kernel::HLERequestContext& ctx) { |
| 536 | auto size = ctx.GetWriteBufferSize() / sizeof(DirectoryName); | 536 | auto size = ctx.GetWriteBufferNumElements<DirectoryName>(); |
| 537 | 537 | ||
| 538 | LOG_DEBUG(Service_BCAT, "called, size={:016X}", size); | 538 | LOG_DEBUG(Service_BCAT, "called, size={:016X}", size); |
| 539 | 539 | ||
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index ff9b0427c..d183e5829 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp | |||
| @@ -192,12 +192,10 @@ private: | |||
| 192 | } | 192 | } |
| 193 | 193 | ||
| 194 | void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) { | 194 | void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) { |
| 195 | u32 out_entries; | 195 | size_t out_entries = 0; |
| 196 | if (keys.GetCommonTickets().empty()) | 196 | if (!keys.GetCommonTickets().empty()) { |
| 197 | out_entries = 0; | 197 | out_entries = ctx.GetWriteBufferNumElements<u128>(); |
| 198 | else | 198 | } |
| 199 | out_entries = static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(u128)); | ||
| 200 | |||
| 201 | LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); | 199 | LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); |
| 202 | 200 | ||
| 203 | keys.PopulateTickets(); | 201 | keys.PopulateTickets(); |
| @@ -206,20 +204,19 @@ private: | |||
| 206 | std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), | 204 | std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), |
| 207 | [](const auto& pair) { return pair.first; }); | 205 | [](const auto& pair) { return pair.first; }); |
| 208 | 206 | ||
| 209 | out_entries = static_cast<u32>(std::min<std::size_t>(ids.size(), out_entries)); | 207 | out_entries = std::min(ids.size(), out_entries); |
| 210 | ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); | 208 | ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); |
| 211 | 209 | ||
| 212 | IPC::ResponseBuilder rb{ctx, 3}; | 210 | IPC::ResponseBuilder rb{ctx, 3}; |
| 213 | rb.Push(ResultSuccess); | 211 | rb.Push(ResultSuccess); |
| 214 | rb.Push<u32>(out_entries); | 212 | rb.Push<u32>(static_cast<u32>(out_entries)); |
| 215 | } | 213 | } |
| 216 | 214 | ||
| 217 | void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) { | 215 | void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) { |
| 218 | u32 out_entries; | 216 | size_t out_entries = 0; |
| 219 | if (keys.GetPersonalizedTickets().empty()) | 217 | if (!keys.GetPersonalizedTickets().empty()) { |
| 220 | out_entries = 0; | 218 | out_entries = ctx.GetWriteBufferNumElements<u128>(); |
| 221 | else | 219 | } |
| 222 | out_entries = static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(u128)); | ||
| 223 | 220 | ||
| 224 | LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); | 221 | LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); |
| 225 | 222 | ||
| @@ -229,12 +226,12 @@ private: | |||
| 229 | std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), | 226 | std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), |
| 230 | [](const auto& pair) { return pair.first; }); | 227 | [](const auto& pair) { return pair.first; }); |
| 231 | 228 | ||
| 232 | out_entries = static_cast<u32>(std::min<std::size_t>(ids.size(), out_entries)); | 229 | out_entries = std::min(ids.size(), out_entries); |
| 233 | ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); | 230 | ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); |
| 234 | 231 | ||
| 235 | IPC::ResponseBuilder rb{ctx, 3}; | 232 | IPC::ResponseBuilder rb{ctx, 3}; |
| 236 | rb.Push(ResultSuccess); | 233 | rb.Push(ResultSuccess); |
| 237 | rb.Push<u32>(out_entries); | 234 | rb.Push<u32>(static_cast<u32>(out_entries)); |
| 238 | } | 235 | } |
| 239 | 236 | ||
| 240 | void GetCommonTicketSize(Kernel::HLERequestContext& ctx) { | 237 | void GetCommonTicketSize(Kernel::HLERequestContext& ctx) { |
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index c08274ef9..fbb16a7da 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp | |||
| @@ -277,7 +277,7 @@ private: | |||
| 277 | LOG_DEBUG(Service_FS, "called."); | 277 | LOG_DEBUG(Service_FS, "called."); |
| 278 | 278 | ||
| 279 | // Calculate how many entries we can fit in the output buffer | 279 | // Calculate how many entries we can fit in the output buffer |
| 280 | const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(FileSys::Entry); | 280 | const u64 count_entries = ctx.GetWriteBufferNumElements<FileSys::Entry>(); |
| 281 | 281 | ||
| 282 | // Cap at total number of entries. | 282 | // Cap at total number of entries. |
| 283 | const u64 actual_entries = std::min(count_entries, entries.size() - next_entry_index); | 283 | const u64 actual_entries = std::min(count_entries, entries.size() - next_entry_index); |
| @@ -543,7 +543,7 @@ public: | |||
| 543 | LOG_DEBUG(Service_FS, "called"); | 543 | LOG_DEBUG(Service_FS, "called"); |
| 544 | 544 | ||
| 545 | // Calculate how many entries we can fit in the output buffer | 545 | // Calculate how many entries we can fit in the output buffer |
| 546 | const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(SaveDataInfo); | 546 | const u64 count_entries = ctx.GetWriteBufferNumElements<SaveDataInfo>(); |
| 547 | 547 | ||
| 548 | // Cap at total number of entries. | 548 | // Cap at total number of entries. |
| 549 | const u64 actual_entries = std::min(count_entries, info.size() - next_entry_index); | 549 | const u64 actual_entries = std::min(count_entries, info.size() - next_entry_index); |
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index 6df563136..c49c61cff 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp | |||
| @@ -292,7 +292,7 @@ public: | |||
| 292 | 292 | ||
| 293 | void GetNetworkInfoLatestUpdate(Kernel::HLERequestContext& ctx) { | 293 | void GetNetworkInfoLatestUpdate(Kernel::HLERequestContext& ctx) { |
| 294 | const std::size_t network_buffer_size = ctx.GetWriteBufferSize(0); | 294 | const std::size_t network_buffer_size = ctx.GetWriteBufferSize(0); |
| 295 | const std::size_t node_buffer_count = ctx.GetWriteBufferSize(1) / sizeof(NodeLatestUpdate); | 295 | const std::size_t node_buffer_count = ctx.GetWriteBufferNumElements<NodeLatestUpdate>(1); |
| 296 | 296 | ||
| 297 | if (node_buffer_count == 0 || network_buffer_size != sizeof(NetworkInfo)) { | 297 | if (node_buffer_count == 0 || network_buffer_size != sizeof(NetworkInfo)) { |
| 298 | LOG_ERROR(Service_LDN, "Invalid buffer, size = {}, count = {}", network_buffer_size, | 298 | LOG_ERROR(Service_LDN, "Invalid buffer, size = {}, count = {}", network_buffer_size, |
| @@ -333,7 +333,7 @@ public: | |||
| 333 | const auto channel{rp.PopEnum<WifiChannel>()}; | 333 | const auto channel{rp.PopEnum<WifiChannel>()}; |
| 334 | const auto scan_filter{rp.PopRaw<ScanFilter>()}; | 334 | const auto scan_filter{rp.PopRaw<ScanFilter>()}; |
| 335 | 335 | ||
| 336 | const std::size_t network_info_size = ctx.GetWriteBufferSize() / sizeof(NetworkInfo); | 336 | const std::size_t network_info_size = ctx.GetWriteBufferNumElements<NetworkInfo>(); |
| 337 | 337 | ||
| 338 | if (network_info_size == 0) { | 338 | if (network_info_size == 0) { |
| 339 | LOG_ERROR(Service_LDN, "Invalid buffer size {}", network_info_size); | 339 | LOG_ERROR(Service_LDN, "Invalid buffer size {}", network_info_size); |
diff --git a/src/core/hle/service/nfc/nfc_user.cpp b/src/core/hle/service/nfc/nfc_user.cpp index 0753333bf..ced2d560b 100644 --- a/src/core/hle/service/nfc/nfc_user.cpp +++ b/src/core/hle/service/nfc/nfc_user.cpp | |||
| @@ -118,7 +118,7 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) { | |||
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | std::vector<u64> nfp_devices; | 120 | std::vector<u64> nfp_devices; |
| 121 | const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64); | 121 | const std::size_t max_allowed_devices = ctx.GetWriteBufferNumElements<u64>(); |
| 122 | 122 | ||
| 123 | for (auto& device : devices) { | 123 | for (auto& device : devices) { |
| 124 | if (nfp_devices.size() >= max_allowed_devices) { | 124 | if (nfp_devices.size() >= max_allowed_devices) { |
diff --git a/src/core/hle/service/nfp/nfp_user.cpp b/src/core/hle/service/nfp/nfp_user.cpp index 2fe3c0ea0..49816b4c7 100644 --- a/src/core/hle/service/nfp/nfp_user.cpp +++ b/src/core/hle/service/nfp/nfp_user.cpp | |||
| @@ -104,9 +104,9 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) { | |||
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | std::vector<u64> nfp_devices; | 106 | std::vector<u64> nfp_devices; |
| 107 | const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64); | 107 | const std::size_t max_allowed_devices = ctx.GetWriteBufferNumElements<u64>(); |
| 108 | 108 | ||
| 109 | for (auto& device : devices) { | 109 | for (const auto& device : devices) { |
| 110 | if (nfp_devices.size() >= max_allowed_devices) { | 110 | if (nfp_devices.size() >= max_allowed_devices) { |
| 111 | continue; | 111 | continue; |
| 112 | } | 112 | } |
| @@ -115,7 +115,7 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) { | |||
| 115 | } | 115 | } |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | if (nfp_devices.size() == 0) { | 118 | if (nfp_devices.empty()) { |
| 119 | IPC::ResponseBuilder rb{ctx, 2}; | 119 | IPC::ResponseBuilder rb{ctx, 2}; |
| 120 | rb.Push(DeviceNotFound); | 120 | rb.Push(DeviceNotFound); |
| 121 | return; | 121 | return; |
diff --git a/src/core/hle/service/ns/iplatform_service_manager.cpp b/src/core/hle/service/ns/iplatform_service_manager.cpp index fd047ff26..1fab2f0dd 100644 --- a/src/core/hle/service/ns/iplatform_service_manager.cpp +++ b/src/core/hle/service/ns/iplatform_service_manager.cpp | |||
| @@ -279,13 +279,10 @@ void IPlatformServiceManager::GetSharedFontInOrderOfPriority(Kernel::HLERequestC | |||
| 279 | font_sizes.push_back(region.size); | 279 | font_sizes.push_back(region.size); |
| 280 | } | 280 | } |
| 281 | 281 | ||
| 282 | // Resize buffers if game requests smaller size output. | 282 | // Resize buffers if game requests smaller size output |
| 283 | font_codes.resize( | 283 | font_codes.resize(std::min(font_codes.size(), ctx.GetWriteBufferNumElements<u32>(0))); |
| 284 | std::min<std::size_t>(font_codes.size(), ctx.GetWriteBufferSize(0) / sizeof(u32))); | 284 | font_offsets.resize(std::min(font_offsets.size(), ctx.GetWriteBufferNumElements<u32>(1))); |
| 285 | font_offsets.resize( | 285 | font_sizes.resize(std::min(font_sizes.size(), ctx.GetWriteBufferNumElements<u32>(2))); |
| 286 | std::min<std::size_t>(font_offsets.size(), ctx.GetWriteBufferSize(1) / sizeof(u32))); | ||
| 287 | font_sizes.resize( | ||
| 288 | std::min<std::size_t>(font_sizes.size(), ctx.GetWriteBufferSize(2) / sizeof(u32))); | ||
| 289 | 286 | ||
| 290 | ctx.WriteBuffer(font_codes, 0); | 287 | ctx.WriteBuffer(font_codes, 0); |
| 291 | ctx.WriteBuffer(font_offsets, 1); | 288 | ctx.WriteBuffer(font_offsets, 1); |
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index f761c2da4..4f1a8d6b7 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp | |||
| @@ -83,7 +83,7 @@ void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_la | |||
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_entries) { | 85 | void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_entries) { |
| 86 | const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode); | 86 | const std::size_t requested_amount = ctx.GetWriteBufferNumElements<LanguageCode>(); |
| 87 | const std::size_t max_amount = std::min(requested_amount, max_entries); | 87 | const std::size_t max_amount = std::min(requested_amount, max_entries); |
| 88 | const std::size_t copy_amount = std::min(available_language_codes.size(), max_amount); | 88 | const std::size_t copy_amount = std::min(available_language_codes.size(), max_amount); |
| 89 | const std::size_t copy_size = copy_amount * sizeof(LanguageCode); | 89 | const std::size_t copy_size = copy_amount * sizeof(LanguageCode); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d7f7d336c..b03a30992 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -28,6 +28,10 @@ add_library(video_core STATIC | |||
| 28 | dirty_flags.h | 28 | dirty_flags.h |
| 29 | dma_pusher.cpp | 29 | dma_pusher.cpp |
| 30 | dma_pusher.h | 30 | dma_pusher.h |
| 31 | engines/sw_blitter/blitter.cpp | ||
| 32 | engines/sw_blitter/blitter.h | ||
| 33 | engines/sw_blitter/converter.cpp | ||
| 34 | engines/sw_blitter/converter.h | ||
| 31 | engines/const_buffer_info.h | 35 | engines/const_buffer_info.h |
| 32 | engines/engine_interface.h | 36 | engines/engine_interface.h |
| 33 | engines/engine_upload.cpp | 37 | engines/engine_upload.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 599551013..5d3a8293b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -1742,12 +1742,12 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1742 | SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); | 1742 | SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); |
| 1743 | 1743 | ||
| 1744 | if constexpr (USE_MEMORY_MAPS) { | 1744 | if constexpr (USE_MEMORY_MAPS) { |
| 1745 | auto upload_staging = runtime.UploadStagingBuffer(copy_size); | ||
| 1745 | std::array copies{BufferCopy{ | 1746 | std::array copies{BufferCopy{ |
| 1746 | .src_offset = 0, | 1747 | .src_offset = upload_staging.offset, |
| 1747 | .dst_offset = buffer.Offset(dest_address), | 1748 | .dst_offset = buffer.Offset(dest_address), |
| 1748 | .size = copy_size, | 1749 | .size = copy_size, |
| 1749 | }}; | 1750 | }}; |
| 1750 | auto upload_staging = runtime.UploadStagingBuffer(copy_size); | ||
| 1751 | u8* const src_pointer = upload_staging.mapped_span.data(); | 1751 | u8* const src_pointer = upload_staging.mapped_span.data(); |
| 1752 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); | 1752 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); |
| 1753 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1753 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); |
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index cdecc3a91..832025d75 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp | |||
| @@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) { | |||
| 20 | ASSERT(memory_manager); | 20 | ASSERT(memory_manager); |
| 21 | dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this); | 21 | dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this); |
| 22 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); | 22 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); |
| 23 | fermi_2d = std::make_unique<Engines::Fermi2D>(); | 23 | fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); |
| 24 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager); | 24 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager); |
| 25 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); | 25 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); |
| 26 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | 26 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index a34819234..28aa85f32 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -51,11 +51,11 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
| 51 | } else { | 51 | } else { |
| 52 | for (u32 line = 0; line < regs.line_count; ++line) { | 52 | for (u32 line = 0; line < regs.line_count; ++line) { |
| 53 | const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch; | 53 | const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch; |
| 54 | memory_manager.WriteBlockUnsafe( | 54 | std::span<const u8> buffer(read_buffer.data() + |
| 55 | dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in, | 55 | static_cast<size_t>(line) * regs.line_length_in, |
| 56 | regs.line_length_in); | 56 | regs.line_length_in); |
| 57 | rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); | ||
| 57 | } | 58 | } |
| 58 | memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count); | ||
| 59 | } | 59 | } |
| 60 | } else { | 60 | } else { |
| 61 | u32 width = regs.dest.width; | 61 | u32 width = regs.dest.width; |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 453e0fb01..c6478ae85 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -3,17 +3,25 @@ | |||
| 3 | 3 | ||
| 4 | #include "common/assert.h" | 4 | #include "common/assert.h" |
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "common/microprofile.h" | ||
| 6 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 7 | #include "video_core/memory_manager.h" | 8 | #include "video_core/engines/sw_blitter/blitter.h" |
| 8 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 9 | #include "video_core/surface.h" | 10 | #include "video_core/surface.h" |
| 11 | #include "video_core/textures/decoders.h" | ||
| 12 | |||
| 13 | MICROPROFILE_DECLARE(GPU_BlitEngine); | ||
| 14 | MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128)); | ||
| 10 | 15 | ||
| 11 | using VideoCore::Surface::BytesPerBlock; | 16 | using VideoCore::Surface::BytesPerBlock; |
| 12 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | 17 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 13 | 18 | ||
| 14 | namespace Tegra::Engines { | 19 | namespace Tegra::Engines { |
| 15 | 20 | ||
| 16 | Fermi2D::Fermi2D() { | 21 | using namespace Texture; |
| 22 | |||
| 23 | Fermi2D::Fermi2D(MemoryManager& memory_manager_) { | ||
| 24 | sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); | ||
| 17 | // Nvidia's OpenGL driver seems to assume these values | 25 | // Nvidia's OpenGL driver seems to assume these values |
| 18 | regs.src.depth = 1; | 26 | regs.src.depth = 1; |
| 19 | regs.dst.depth = 1; | 27 | regs.dst.depth = 1; |
| @@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 | |||
| 42 | } | 50 | } |
| 43 | 51 | ||
| 44 | void Fermi2D::Blit() { | 52 | void Fermi2D::Blit() { |
| 53 | MICROPROFILE_SCOPE(GPU_BlitEngine); | ||
| 45 | LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", | 54 | LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", |
| 46 | regs.src.Address(), regs.dst.Address()); | 55 | regs.src.Address(), regs.dst.Address()); |
| 47 | 56 | ||
| @@ -52,9 +61,16 @@ void Fermi2D::Blit() { | |||
| 52 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); | 61 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); |
| 53 | 62 | ||
| 54 | const auto& args = regs.pixels_from_memory; | 63 | const auto& args = regs.pixels_from_memory; |
| 64 | constexpr s64 null_derivate = 1ULL << 32; | ||
| 65 | Surface src = regs.src; | ||
| 66 | const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | ||
| 67 | const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && | ||
| 68 | src.format != regs.dst.format; | ||
| 55 | Config config{ | 69 | Config config{ |
| 56 | .operation = regs.operation, | 70 | .operation = regs.operation, |
| 57 | .filter = args.sample_mode.filter, | 71 | .filter = args.sample_mode.filter, |
| 72 | .must_accelerate = | ||
| 73 | args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu, | ||
| 58 | .dst_x0 = args.dst_x0, | 74 | .dst_x0 = args.dst_x0, |
| 59 | .dst_y0 = args.dst_y0, | 75 | .dst_y0 = args.dst_y0, |
| 60 | .dst_x1 = args.dst_x0 + args.dst_width, | 76 | .dst_x1 = args.dst_x0 + args.dst_width, |
| @@ -64,8 +80,7 @@ void Fermi2D::Blit() { | |||
| 64 | .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), | 80 | .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), |
| 65 | .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), | 81 | .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), |
| 66 | }; | 82 | }; |
| 67 | Surface src = regs.src; | 83 | |
| 68 | const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | ||
| 69 | const auto need_align_to_pitch = | 84 | const auto need_align_to_pitch = |
| 70 | src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && | 85 | src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && |
| 71 | static_cast<s32>(src.width) == config.src_x1 && | 86 | static_cast<s32>(src.width) == config.src_x1 && |
| @@ -78,8 +93,9 @@ void Fermi2D::Blit() { | |||
| 78 | config.src_x1 -= config.src_x0; | 93 | config.src_x1 -= config.src_x0; |
| 79 | config.src_x0 = 0; | 94 | config.src_x0 = 0; |
| 80 | } | 95 | } |
| 96 | |||
| 81 | if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { | 97 | if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { |
| 82 | UNIMPLEMENTED(); | 98 | sw_blitter->Blit(src, regs.dst, config); |
| 83 | } | 99 | } |
| 84 | } | 100 | } |
| 85 | 101 | ||
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 1229aa35b..24b518cb5 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <memory> | ||
| 8 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 9 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -21,6 +22,10 @@ class RasterizerInterface; | |||
| 21 | 22 | ||
| 22 | namespace Tegra::Engines { | 23 | namespace Tegra::Engines { |
| 23 | 24 | ||
| 25 | namespace Blitter { | ||
| 26 | class SoftwareBlitEngine; | ||
| 27 | } | ||
| 28 | |||
| 24 | /** | 29 | /** |
| 25 | * This Engine is known as G80_2D. Documentation can be found in: | 30 | * This Engine is known as G80_2D. Documentation can be found in: |
| 26 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml | 31 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml |
| @@ -32,7 +37,7 @@ namespace Tegra::Engines { | |||
| 32 | 37 | ||
| 33 | class Fermi2D final : public EngineInterface { | 38 | class Fermi2D final : public EngineInterface { |
| 34 | public: | 39 | public: |
| 35 | explicit Fermi2D(); | 40 | explicit Fermi2D(MemoryManager& memory_manager_); |
| 36 | ~Fermi2D() override; | 41 | ~Fermi2D() override; |
| 37 | 42 | ||
| 38 | /// Binds a rasterizer to this engine. | 43 | /// Binds a rasterizer to this engine. |
| @@ -286,6 +291,7 @@ public: | |||
| 286 | struct Config { | 291 | struct Config { |
| 287 | Operation operation; | 292 | Operation operation; |
| 288 | Filter filter; | 293 | Filter filter; |
| 294 | bool must_accelerate; | ||
| 289 | s32 dst_x0; | 295 | s32 dst_x0; |
| 290 | s32 dst_y0; | 296 | s32 dst_y0; |
| 291 | s32 dst_x1; | 297 | s32 dst_x1; |
| @@ -298,6 +304,7 @@ public: | |||
| 298 | 304 | ||
| 299 | private: | 305 | private: |
| 300 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 306 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 307 | std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; | ||
| 301 | 308 | ||
| 302 | /// Performs the copy from the source surface to the destination surface as configured in the | 309 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 303 | /// registers. | 310 | /// registers. |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5bb1427c1..6d43e23ea 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -249,9 +249,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 249 | return; | 249 | return; |
| 250 | case MAXWELL3D_REG_INDEX(fragment_barrier): | 250 | case MAXWELL3D_REG_INDEX(fragment_barrier): |
| 251 | return rasterizer->FragmentBarrier(); | 251 | return rasterizer->FragmentBarrier(); |
| 252 | case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache): | ||
| 253 | rasterizer->InvalidateGPUCache(); | ||
| 254 | return rasterizer->WaitForIdle(); | ||
| 255 | case MAXWELL3D_REG_INDEX(tiled_cache_barrier): | 252 | case MAXWELL3D_REG_INDEX(tiled_cache_barrier): |
| 256 | return rasterizer->TiledCacheBarrier(); | 253 | return rasterizer->TiledCacheBarrier(); |
| 257 | } | 254 | } |
| @@ -511,10 +508,7 @@ void Maxwell3D::ProcessCounterReset() { | |||
| 511 | 508 | ||
| 512 | void Maxwell3D::ProcessSyncPoint() { | 509 | void Maxwell3D::ProcessSyncPoint() { |
| 513 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 510 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 514 | const u32 cache_flush = regs.sync_info.clean_l2.Value(); | 511 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.clean_l2.Value(); |
| 515 | if (cache_flush != 0) { | ||
| 516 | rasterizer->InvalidateGPUCache(); | ||
| 517 | } | ||
| 518 | rasterizer->SignalSyncPoint(sync_point); | 512 | rasterizer->SignalSyncPoint(sync_point); |
| 519 | } | 513 | } |
| 520 | 514 | ||
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1bf6ca2dd..334429514 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -62,7 +62,8 @@ void MaxwellDMA::Launch() { | |||
| 62 | 62 | ||
| 63 | if (!is_src_pitch && !is_dst_pitch) { | 63 | if (!is_src_pitch && !is_dst_pitch) { |
| 64 | // If both the source and the destination are in block layout, assert. | 64 | // If both the source and the destination are in block layout, assert. |
| 65 | UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); | 65 | CopyBlockLinearToBlockLinear(); |
| 66 | ReleaseSemaphore(); | ||
| 66 | return; | 67 | return; |
| 67 | } | 68 | } |
| 68 | 69 | ||
| @@ -291,6 +292,70 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 291 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 292 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 292 | } | 293 | } |
| 293 | 294 | ||
| 295 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||
| 296 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | ||
| 297 | |||
| 298 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||
| 299 | |||
| 300 | // Deswizzle the input and copy it over. | ||
| 301 | const Parameters& src = regs.src_params; | ||
| 302 | const Parameters& dst = regs.dst_params; | ||
| 303 | |||
| 304 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||
| 305 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||
| 306 | |||
| 307 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; | ||
| 308 | |||
| 309 | u32 src_width = src.width; | ||
| 310 | u32 dst_width = dst.width; | ||
| 311 | u32 x_elements = regs.line_length_in; | ||
| 312 | u32 src_x_offset = src.origin.x; | ||
| 313 | u32 dst_x_offset = dst.origin.x; | ||
| 314 | u32 bpp_shift = 0U; | ||
| 315 | if (!is_remapping) { | ||
| 316 | bpp_shift = Common::FoldRight( | ||
| 317 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 318 | src_width, dst_width, x_elements, src_x_offset, dst_x_offset, | ||
| 319 | static_cast<u32>(regs.offset_in), static_cast<u32>(regs.offset_out)); | ||
| 320 | src_width >>= bpp_shift; | ||
| 321 | dst_width >>= bpp_shift; | ||
| 322 | x_elements >>= bpp_shift; | ||
| 323 | src_x_offset >>= bpp_shift; | ||
| 324 | dst_x_offset >>= bpp_shift; | ||
| 325 | } | ||
| 326 | |||
| 327 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | ||
| 328 | const size_t src_size = CalculateSize(true, bytes_per_pixel, src_width, src.height, src.depth, | ||
| 329 | src.block_size.height, src.block_size.depth); | ||
| 330 | const size_t dst_size = CalculateSize(true, bytes_per_pixel, dst_width, dst.height, dst.depth, | ||
| 331 | dst.block_size.height, dst.block_size.depth); | ||
| 332 | |||
| 333 | const u32 pitch = x_elements * bytes_per_pixel; | ||
| 334 | const size_t mid_buffer_size = pitch * regs.line_count; | ||
| 335 | |||
| 336 | if (read_buffer.size() < src_size) { | ||
| 337 | read_buffer.resize(src_size); | ||
| 338 | } | ||
| 339 | if (write_buffer.size() < dst_size) { | ||
| 340 | write_buffer.resize(dst_size); | ||
| 341 | } | ||
| 342 | |||
| 343 | intermediate_buffer.resize(mid_buffer_size); | ||
| 344 | |||
| 345 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 346 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 347 | |||
| 348 | UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, | ||
| 349 | src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, | ||
| 350 | src.block_size.height, src.block_size.depth, pitch); | ||
| 351 | |||
| 352 | SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, | ||
| 353 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, | ||
| 354 | dst.block_size.height, dst.block_size.depth, pitch); | ||
| 355 | |||
| 356 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 357 | } | ||
| 358 | |||
| 294 | void MaxwellDMA::ReleaseSemaphore() { | 359 | void MaxwellDMA::ReleaseSemaphore() { |
| 295 | const auto type = regs.launch_dma.semaphore_type; | 360 | const auto type = regs.launch_dma.semaphore_type; |
| 296 | const GPUVAddr address = regs.semaphore.address; | 361 | const GPUVAddr address = regs.semaphore.address; |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 953e34adc..d40d3d302 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -223,6 +223,8 @@ private: | |||
| 223 | 223 | ||
| 224 | void CopyPitchToBlockLinear(); | 224 | void CopyPitchToBlockLinear(); |
| 225 | 225 | ||
| 226 | void CopyBlockLinearToBlockLinear(); | ||
| 227 | |||
| 226 | void FastCopyBlockLinearToPitch(); | 228 | void FastCopyBlockLinearToPitch(); |
| 227 | 229 | ||
| 228 | void ReleaseSemaphore(); | 230 | void ReleaseSemaphore(); |
| @@ -234,6 +236,7 @@ private: | |||
| 234 | 236 | ||
| 235 | std::vector<u8> read_buffer; | 237 | std::vector<u8> read_buffer; |
| 236 | std::vector<u8> write_buffer; | 238 | std::vector<u8> write_buffer; |
| 239 | std::vector<u8> intermediate_buffer; | ||
| 237 | 240 | ||
| 238 | static constexpr std::size_t NUM_REGS = 0x800; | 241 | static constexpr std::size_t NUM_REGS = 0x800; |
| 239 | struct Regs { | 242 | struct Regs { |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 4d2278811..c308ba3fc 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -118,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() { | |||
| 118 | std::function<void()> operation([this, sequence_address, payload] { | 118 | std::function<void()> operation([this, sequence_address, payload] { |
| 119 | memory_manager.Write<u32>(sequence_address, payload); | 119 | memory_manager.Write<u32>(sequence_address, payload); |
| 120 | }); | 120 | }); |
| 121 | rasterizer->SyncOperation(std::move(operation)); | 121 | rasterizer->SignalFence(std::move(operation)); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | void Puller::ProcessSemaphoreAcquire() { | 124 | void Puller::ProcessSemaphoreAcquire() { |
| @@ -151,8 +151,8 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { | |||
| 151 | case BufferMethods::SemaphoreAddressLow: | 151 | case BufferMethods::SemaphoreAddressLow: |
| 152 | case BufferMethods::SemaphoreSequencePayload: | 152 | case BufferMethods::SemaphoreSequencePayload: |
| 153 | case BufferMethods::SyncpointPayload: | 153 | case BufferMethods::SyncpointPayload: |
| 154 | break; | ||
| 155 | case BufferMethods::WrcacheFlush: | 154 | case BufferMethods::WrcacheFlush: |
| 155 | break; | ||
| 156 | case BufferMethods::RefCnt: | 156 | case BufferMethods::RefCnt: |
| 157 | rasterizer->SignalReference(); | 157 | rasterizer->SignalReference(); |
| 158 | break; | 158 | break; |
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp new file mode 100644 index 000000000..2f1ea4626 --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.cpp | |||
| @@ -0,0 +1,238 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <cmath> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "video_core/engines/sw_blitter/blitter.h" | ||
| 9 | #include "video_core/engines/sw_blitter/converter.h" | ||
| 10 | #include "video_core/memory_manager.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/textures/decoders.h" | ||
| 13 | |||
| 14 | namespace Tegra { | ||
| 15 | class MemoryManager; | ||
| 16 | } | ||
| 17 | |||
| 18 | using VideoCore::Surface::BytesPerBlock; | ||
| 19 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 20 | |||
| 21 | namespace Tegra::Engines::Blitter { | ||
| 22 | |||
| 23 | using namespace Texture; | ||
| 24 | |||
| 25 | namespace { | ||
| 26 | |||
| 27 | constexpr size_t ir_components = 4; | ||
| 28 | |||
| 29 | void NearestNeighbor(std::span<const u8> input, std::span<u8> output, u32 src_width, u32 src_height, | ||
| 30 | u32 dst_width, u32 dst_height, size_t bpp) { | ||
| 31 | const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); | ||
| 32 | const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); | ||
| 33 | size_t src_y = 0; | ||
| 34 | for (u32 y = 0; y < dst_height; y++) { | ||
| 35 | size_t src_x = 0; | ||
| 36 | for (u32 x = 0; x < dst_width; x++) { | ||
| 37 | const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp; | ||
| 38 | const size_t write_to = (y * dst_width + x) * bpp; | ||
| 39 | |||
| 40 | std::memcpy(&output[write_to], &input[read_from], bpp); | ||
| 41 | src_x += dx_du; | ||
| 42 | } | ||
| 43 | src_y += dy_dv; | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | void NearestNeighborFast(std::span<const f32> input, std::span<f32> output, u32 src_width, | ||
| 48 | u32 src_height, u32 dst_width, u32 dst_height) { | ||
| 49 | const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); | ||
| 50 | const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); | ||
| 51 | size_t src_y = 0; | ||
| 52 | for (u32 y = 0; y < dst_height; y++) { | ||
| 53 | size_t src_x = 0; | ||
| 54 | for (u32 x = 0; x < dst_width; x++) { | ||
| 55 | const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components; | ||
| 56 | const size_t write_to = (y * dst_width + x) * ir_components; | ||
| 57 | |||
| 58 | std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components); | ||
| 59 | src_x += dx_du; | ||
| 60 | } | ||
| 61 | src_y += dy_dv; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | void Bilinear(std::span<const f32> input, std::span<f32> output, size_t src_width, | ||
| 66 | size_t src_height, size_t dst_width, size_t dst_height) { | ||
| 67 | const auto bilinear_sample = [](std::span<const f32> x0_y0, std::span<const f32> x1_y0, | ||
| 68 | std::span<const f32> x0_y1, std::span<const f32> x1_y1, | ||
| 69 | f32 weight_x, f32 weight_y) { | ||
| 70 | std::array<f32, ir_components> result{}; | ||
| 71 | for (size_t i = 0; i < ir_components; i++) { | ||
| 72 | const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x); | ||
| 73 | const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x); | ||
| 74 | result[i] = std::lerp(a, b, weight_y); | ||
| 75 | } | ||
| 76 | return result; | ||
| 77 | }; | ||
| 78 | const f32 dx_du = | ||
| 79 | dst_width > 1 ? static_cast<f32>(src_width - 1) / static_cast<f32>(dst_width - 1) : 0.f; | ||
| 80 | const f32 dy_dv = | ||
| 81 | dst_height > 1 ? static_cast<f32>(src_height - 1) / static_cast<f32>(dst_height - 1) : 0.f; | ||
| 82 | for (u32 y = 0; y < dst_height; y++) { | ||
| 83 | for (u32 x = 0; x < dst_width; x++) { | ||
| 84 | const f32 x_low = std::floor(static_cast<f32>(x) * dx_du); | ||
| 85 | const f32 y_low = std::floor(static_cast<f32>(y) * dy_dv); | ||
| 86 | const f32 x_high = std::ceil(static_cast<f32>(x) * dx_du); | ||
| 87 | const f32 y_high = std::ceil(static_cast<f32>(y) * dy_dv); | ||
| 88 | const f32 weight_x = (static_cast<f32>(x) * dx_du) - x_low; | ||
| 89 | const f32 weight_y = (static_cast<f32>(y) * dy_dv) - y_low; | ||
| 90 | |||
| 91 | const auto read_src = [&](f32 in_x, f32 in_y) { | ||
| 92 | const size_t read_from = | ||
| 93 | ((static_cast<size_t>(in_x) * src_width + static_cast<size_t>(in_y)) >> 32) * | ||
| 94 | ir_components; | ||
| 95 | return std::span<const f32>(&input[read_from], ir_components); | ||
| 96 | }; | ||
| 97 | |||
| 98 | auto x0_y0 = read_src(x_low, y_low); | ||
| 99 | auto x1_y0 = read_src(x_high, y_low); | ||
| 100 | auto x0_y1 = read_src(x_low, y_high); | ||
| 101 | auto x1_y1 = read_src(x_high, y_high); | ||
| 102 | |||
| 103 | const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y); | ||
| 104 | |||
| 105 | const size_t write_to = (y * dst_width + x) * ir_components; | ||
| 106 | |||
| 107 | std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | } // namespace | ||
| 113 | |||
| 114 | struct SoftwareBlitEngine::BlitEngineImpl { | ||
| 115 | std::vector<u8> tmp_buffer; | ||
| 116 | std::vector<u8> src_buffer; | ||
| 117 | std::vector<u8> dst_buffer; | ||
| 118 | std::vector<f32> intermediate_src; | ||
| 119 | std::vector<f32> intermediate_dst; | ||
| 120 | ConverterFactory converter_factory; | ||
| 121 | }; | ||
| 122 | |||
| 123 | SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_) | ||
| 124 | : memory_manager{memory_manager_} { | ||
| 125 | impl = std::make_unique<BlitEngineImpl>(); | ||
| 126 | } | ||
| 127 | |||
| 128 | SoftwareBlitEngine::~SoftwareBlitEngine() = default; | ||
| 129 | |||
| 130 | bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | ||
| 131 | Fermi2D::Config& config) { | ||
| 132 | const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) { | ||
| 133 | if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||
| 134 | return CalculateSize(true, bytes_per_pixel, surface.width, surface.height, | ||
| 135 | surface.depth, surface.block_height, surface.block_depth); | ||
| 136 | } | ||
| 137 | return static_cast<size_t>(surface.pitch * surface.height); | ||
| 138 | }; | ||
| 139 | const auto process_pitch_linear = [](bool unpack, std::span<const u8> input, | ||
| 140 | std::span<u8> output, u32 extent_x, u32 extent_y, | ||
| 141 | u32 pitch, u32 x0, u32 y0, size_t bpp) { | ||
| 142 | const size_t base_offset = x0 * bpp; | ||
| 143 | const size_t copy_size = extent_x * bpp; | ||
| 144 | for (u32 y = y0; y < extent_y; y++) { | ||
| 145 | const size_t first_offset = y * pitch + base_offset; | ||
| 146 | const size_t second_offset = y * extent_x * bpp; | ||
| 147 | u8* write_to = unpack ? &output[first_offset] : &output[second_offset]; | ||
| 148 | const u8* read_from = unpack ? &input[second_offset] : &input[first_offset]; | ||
| 149 | std::memcpy(write_to, read_from, copy_size); | ||
| 150 | } | ||
| 151 | }; | ||
| 152 | |||
| 153 | const u32 src_extent_x = config.src_x1 - config.src_x0; | ||
| 154 | const u32 src_extent_y = config.src_y1 - config.src_y0; | ||
| 155 | |||
| 156 | const u32 dst_extent_x = config.dst_x1 - config.dst_x0; | ||
| 157 | const u32 dst_extent_y = config.dst_y1 - config.dst_y0; | ||
| 158 | const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | ||
| 159 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | ||
| 160 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | ||
| 161 | impl->tmp_buffer.resize(src_size); | ||
| 162 | memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); | ||
| 163 | |||
| 164 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | ||
| 165 | |||
| 166 | const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; | ||
| 167 | |||
| 168 | impl->src_buffer.resize(src_copy_size); | ||
| 169 | |||
| 170 | const bool no_passthrough = | ||
| 171 | src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y; | ||
| 172 | |||
| 173 | const auto convertion_phase_same_format = [&]() { | ||
| 174 | NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, | ||
| 175 | dst_extent_x, dst_extent_y, dst_bytes_per_pixel); | ||
| 176 | }; | ||
| 177 | |||
| 178 | const auto convertion_phase_ir = [&]() { | ||
| 179 | auto* input_converter = impl->converter_factory.GetFormatConverter(src.format); | ||
| 180 | impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components); | ||
| 181 | impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components); | ||
| 182 | input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); | ||
| 183 | |||
| 184 | if (config.filter != Fermi2D::Filter::Bilinear) { | ||
| 185 | NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, | ||
| 186 | src_extent_y, dst_extent_x, dst_extent_y); | ||
| 187 | } else { | ||
| 188 | Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y, | ||
| 189 | dst_extent_x, dst_extent_y); | ||
| 190 | } | ||
| 191 | |||
| 192 | auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format); | ||
| 193 | output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer); | ||
| 194 | }; | ||
| 195 | |||
| 196 | // Do actuall Blit | ||
| 197 | |||
| 198 | impl->dst_buffer.resize(dst_copy_size); | ||
| 199 | if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||
| 200 | UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, | ||
| 201 | src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, | ||
| 202 | src_extent_y, src.block_height, src.block_depth, | ||
| 203 | src_extent_x * src_bytes_per_pixel); | ||
| 204 | } else { | ||
| 205 | process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, | ||
| 206 | src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); | ||
| 207 | } | ||
| 208 | |||
| 209 | // Conversion Phase | ||
| 210 | if (no_passthrough) { | ||
| 211 | if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) { | ||
| 212 | convertion_phase_ir(); | ||
| 213 | } else { | ||
| 214 | convertion_phase_same_format(); | ||
| 215 | } | ||
| 216 | } else { | ||
| 217 | impl->dst_buffer.swap(impl->src_buffer); | ||
| 218 | } | ||
| 219 | |||
| 220 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | ||
| 221 | impl->tmp_buffer.resize(dst_size); | ||
| 222 | memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); | ||
| 223 | |||
| 224 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||
| 225 | SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, | ||
| 226 | dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, | ||
| 227 | dst_extent_y, dst.block_height, dst.block_depth, | ||
| 228 | dst_extent_x * dst_bytes_per_pixel); | ||
| 229 | } else { | ||
| 230 | process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, | ||
| 231 | dst.pitch, config.dst_x0, config.dst_y0, | ||
| 232 | static_cast<size_t>(dst_bytes_per_pixel)); | ||
| 233 | } | ||
| 234 | memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); | ||
| 235 | return true; | ||
| 236 | } | ||
| 237 | |||
| 238 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h new file mode 100644 index 000000000..85b55c836 --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "video_core/engines/fermi_2d.h" | ||
| 7 | |||
| 8 | namespace Tegra { | ||
| 9 | class MemoryManager; | ||
| 10 | } | ||
| 11 | |||
| 12 | namespace Tegra::Engines::Blitter { | ||
| 13 | |||
| 14 | class SoftwareBlitEngine { | ||
| 15 | public: | ||
| 16 | explicit SoftwareBlitEngine(MemoryManager& memory_manager_); | ||
| 17 | ~SoftwareBlitEngine(); | ||
| 18 | |||
| 19 | bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config); | ||
| 20 | |||
| 21 | private: | ||
| 22 | MemoryManager& memory_manager; | ||
| 23 | struct BlitEngineImpl; | ||
| 24 | std::unique_ptr<BlitEngineImpl> impl; | ||
| 25 | }; | ||
| 26 | |||
| 27 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp new file mode 100644 index 000000000..cd46dfd4f --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.cpp | |||
| @@ -0,0 +1,1234 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <array> | ||
| 5 | #include <bit> | ||
| 6 | #include <cmath> | ||
| 7 | #include <span> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "video_core/engines/sw_blitter/converter.h" | ||
| 12 | #include "video_core/surface.h" | ||
| 13 | #include "video_core/textures/decoders.h" | ||
| 14 | |||
| 15 | #ifdef _MSC_VER | ||
| 16 | #define FORCE_INLINE __forceinline | ||
| 17 | #else | ||
| 18 | #define FORCE_INLINE inline __attribute__((always_inline)) | ||
| 19 | #endif | ||
| 20 | |||
| 21 | namespace Tegra::Engines::Blitter { | ||
| 22 | |||
| 23 | enum class Swizzle : size_t { | ||
| 24 | R = 0, | ||
| 25 | G = 1, | ||
| 26 | B = 2, | ||
| 27 | A = 3, | ||
| 28 | None, | ||
| 29 | }; | ||
| 30 | |||
| 31 | enum class ComponentType : u32 { | ||
| 32 | SNORM = 1, | ||
| 33 | UNORM = 2, | ||
| 34 | SINT = 3, | ||
| 35 | UINT = 4, | ||
| 36 | SNORM_FORCE_FP16 = 5, | ||
| 37 | UNORM_FORCE_FP16 = 6, | ||
| 38 | FLOAT = 7, | ||
| 39 | SRGB = 8, | ||
| 40 | }; | ||
| 41 | |||
| 42 | namespace { | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Note: Use generate_converters.py to generate the structs and searches for new render target | ||
| 46 | * formats and copy paste them to this file in order to update. just call "python | ||
| 47 | * generate_converters.py" and get the code from the output. modify the file to add new formats. | ||
| 48 | */ | ||
| 49 | |||
| 50 | constexpr std::array<f32, 256> SRGB_TO_RGB_LUT = { | ||
| 51 | 0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f, | ||
| 52 | 1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f, | ||
| 53 | 3.676507e-03f, 4.024717e-03f, 4.391442e-03f, 4.776953e-03f, 5.181517e-03f, 5.605392e-03f, | ||
| 54 | 6.048833e-03f, 6.512091e-03f, 6.995410e-03f, 7.499032e-03f, 8.023193e-03f, 8.568126e-03f, | ||
| 55 | 9.134059e-03f, 9.721218e-03f, 1.032982e-02f, 1.096009e-02f, 1.161224e-02f, 1.228649e-02f, | ||
| 56 | 1.298303e-02f, 1.370208e-02f, 1.444384e-02f, 1.520851e-02f, 1.599629e-02f, 1.680738e-02f, | ||
| 57 | 1.764195e-02f, 1.850022e-02f, 1.938236e-02f, 2.028856e-02f, 2.121901e-02f, 2.217389e-02f, | ||
| 58 | 2.315337e-02f, 2.415763e-02f, 2.518686e-02f, 2.624122e-02f, 2.732089e-02f, 2.842604e-02f, | ||
| 59 | 2.955684e-02f, 3.071344e-02f, 3.189603e-02f, 3.310477e-02f, 3.433981e-02f, 3.560131e-02f, | ||
| 60 | 3.688945e-02f, 3.820437e-02f, 3.954624e-02f, 4.091520e-02f, 4.231141e-02f, 4.373503e-02f, | ||
| 61 | 4.518620e-02f, 4.666509e-02f, 4.817183e-02f, 4.970657e-02f, 5.126946e-02f, 5.286065e-02f, | ||
| 62 | 5.448028e-02f, 5.612849e-02f, 5.780543e-02f, 5.951124e-02f, 6.124605e-02f, 6.301001e-02f, | ||
| 63 | 6.480327e-02f, 6.662594e-02f, 6.847817e-02f, 7.036009e-02f, 7.227185e-02f, 7.421357e-02f, | ||
| 64 | 7.618538e-02f, 7.818742e-02f, 8.021982e-02f, 8.228271e-02f, 8.437621e-02f, 8.650046e-02f, | ||
| 65 | 8.865558e-02f, 9.084171e-02f, 9.305897e-02f, 9.530747e-02f, 9.758735e-02f, 9.989873e-02f, | ||
| 66 | 1.022417e-01f, 1.046165e-01f, 1.070231e-01f, 1.094617e-01f, 1.119324e-01f, 1.144354e-01f, | ||
| 67 | 1.169707e-01f, 1.195384e-01f, 1.221388e-01f, 1.247718e-01f, 1.274377e-01f, 1.301365e-01f, | ||
| 68 | 1.328683e-01f, 1.356333e-01f, 1.384316e-01f, 1.412633e-01f, 1.441285e-01f, 1.470273e-01f, | ||
| 69 | 1.499598e-01f, 1.529261e-01f, 1.559265e-01f, 1.589608e-01f, 1.620294e-01f, 1.651322e-01f, | ||
| 70 | 1.682694e-01f, 1.714411e-01f, 1.746474e-01f, 1.778884e-01f, 1.811642e-01f, 1.844750e-01f, | ||
| 71 | 1.878208e-01f, 1.912017e-01f, 1.946178e-01f, 1.980693e-01f, 2.015563e-01f, 2.050787e-01f, | ||
| 72 | 2.086369e-01f, 2.122308e-01f, 2.158605e-01f, 2.195262e-01f, 2.232280e-01f, 2.269659e-01f, | ||
| 73 | 2.307401e-01f, 2.345506e-01f, 2.383976e-01f, 2.422811e-01f, 2.462013e-01f, 2.501583e-01f, | ||
| 74 | 2.541521e-01f, 2.581829e-01f, 2.622507e-01f, 2.663556e-01f, 2.704978e-01f, 2.746773e-01f, | ||
| 75 | 2.788943e-01f, 2.831487e-01f, 2.874408e-01f, 2.917706e-01f, 2.961383e-01f, 3.005438e-01f, | ||
| 76 | 3.049873e-01f, 3.094689e-01f, 3.139887e-01f, 3.185468e-01f, 3.231432e-01f, 3.277781e-01f, | ||
| 77 | 3.324515e-01f, 3.371636e-01f, 3.419144e-01f, 3.467041e-01f, 3.515326e-01f, 3.564001e-01f, | ||
| 78 | 3.613068e-01f, 3.662526e-01f, 3.712377e-01f, 3.762621e-01f, 3.813260e-01f, 3.864294e-01f, | ||
| 79 | 3.915725e-01f, 3.967552e-01f, 4.019778e-01f, 4.072402e-01f, 4.125426e-01f, 4.178851e-01f, | ||
| 80 | 4.232677e-01f, 4.286905e-01f, 4.341536e-01f, 4.396572e-01f, 4.452012e-01f, 4.507858e-01f, | ||
| 81 | 4.564110e-01f, 4.620770e-01f, 4.677838e-01f, 4.735315e-01f, 4.793202e-01f, 4.851499e-01f, | ||
| 82 | 4.910209e-01f, 4.969330e-01f, 5.028865e-01f, 5.088813e-01f, 5.149177e-01f, 5.209956e-01f, | ||
| 83 | 5.271151e-01f, 5.332764e-01f, 5.394795e-01f, 5.457245e-01f, 5.520114e-01f, 5.583404e-01f, | ||
| 84 | 5.647115e-01f, 5.711249e-01f, 5.775805e-01f, 5.840784e-01f, 5.906188e-01f, 5.972018e-01f, | ||
| 85 | 6.038274e-01f, 6.104956e-01f, 6.172066e-01f, 6.239604e-01f, 6.307572e-01f, 6.375968e-01f, | ||
| 86 | 6.444797e-01f, 6.514056e-01f, 6.583748e-01f, 6.653873e-01f, 6.724432e-01f, 6.795425e-01f, | ||
| 87 | 6.866853e-01f, 6.938717e-01f, 7.011019e-01f, 7.083758e-01f, 7.156935e-01f, 7.230551e-01f, | ||
| 88 | 7.304608e-01f, 7.379104e-01f, 7.454042e-01f, 7.529422e-01f, 7.605245e-01f, 7.681512e-01f, | ||
| 89 | 7.758222e-01f, 7.835378e-01f, 7.912979e-01f, 7.991027e-01f, 8.069522e-01f, 8.148466e-01f, | ||
| 90 | 8.227857e-01f, 8.307699e-01f, 8.387990e-01f, 8.468732e-01f, 8.549926e-01f, 8.631572e-01f, | ||
| 91 | 8.713671e-01f, 8.796224e-01f, 8.879231e-01f, 8.962694e-01f, 9.046612e-01f, 9.130986e-01f, | ||
| 92 | 9.215819e-01f, 9.301109e-01f, 9.386857e-01f, 9.473065e-01f, 9.559733e-01f, 9.646863e-01f, | ||
| 93 | 9.734453e-01f, 9.822506e-01f, 9.911021e-01f, 1.000000e+00f}; | ||
| 94 | |||
| 95 | constexpr std::array<f32, 256> RGB_TO_SRGB_LUT = { | ||
| 96 | 0.000000e+00f, 4.984009e-02f, 8.494473e-02f, 1.107021e-01f, 1.318038e-01f, 1.500052e-01f, | ||
| 97 | 1.661857e-01f, 1.808585e-01f, 1.943532e-01f, 2.068957e-01f, 2.186491e-01f, 2.297351e-01f, | ||
| 98 | 2.402475e-01f, 2.502604e-01f, 2.598334e-01f, 2.690152e-01f, 2.778465e-01f, 2.863614e-01f, | ||
| 99 | 2.945889e-01f, 3.025538e-01f, 3.102778e-01f, 3.177796e-01f, 3.250757e-01f, 3.321809e-01f, | ||
| 100 | 3.391081e-01f, 3.458689e-01f, 3.524737e-01f, 3.589320e-01f, 3.652521e-01f, 3.714419e-01f, | ||
| 101 | 3.775084e-01f, 3.834581e-01f, 3.892968e-01f, 3.950301e-01f, 4.006628e-01f, 4.061998e-01f, | ||
| 102 | 4.116451e-01f, 4.170030e-01f, 4.222770e-01f, 4.274707e-01f, 4.325873e-01f, 4.376298e-01f, | ||
| 103 | 4.426010e-01f, 4.475037e-01f, 4.523403e-01f, 4.571131e-01f, 4.618246e-01f, 4.664766e-01f, | ||
| 104 | 4.710712e-01f, 4.756104e-01f, 4.800958e-01f, 4.845292e-01f, 4.889122e-01f, 4.932462e-01f, | ||
| 105 | 4.975329e-01f, 5.017734e-01f, 5.059693e-01f, 5.101216e-01f, 5.142317e-01f, 5.183006e-01f, | ||
| 106 | 5.223295e-01f, 5.263194e-01f, 5.302714e-01f, 5.341862e-01f, 5.380651e-01f, 5.419087e-01f, | ||
| 107 | 5.457181e-01f, 5.494938e-01f, 5.532369e-01f, 5.569480e-01f, 5.606278e-01f, 5.642771e-01f, | ||
| 108 | 5.678965e-01f, 5.714868e-01f, 5.750484e-01f, 5.785821e-01f, 5.820884e-01f, 5.855680e-01f, | ||
| 109 | 5.890211e-01f, 5.924487e-01f, 5.958509e-01f, 5.992285e-01f, 6.025819e-01f, 6.059114e-01f, | ||
| 110 | 6.092176e-01f, 6.125010e-01f, 6.157619e-01f, 6.190008e-01f, 6.222180e-01f, 6.254140e-01f, | ||
| 111 | 6.285890e-01f, 6.317436e-01f, 6.348780e-01f, 6.379926e-01f, 6.410878e-01f, 6.441637e-01f, | ||
| 112 | 6.472208e-01f, 6.502595e-01f, 6.532799e-01f, 6.562824e-01f, 6.592672e-01f, 6.622347e-01f, | ||
| 113 | 6.651851e-01f, 6.681187e-01f, 6.710356e-01f, 6.739363e-01f, 6.768209e-01f, 6.796897e-01f, | ||
| 114 | 6.825429e-01f, 6.853807e-01f, 6.882034e-01f, 6.910111e-01f, 6.938041e-01f, 6.965826e-01f, | ||
| 115 | 6.993468e-01f, 7.020969e-01f, 7.048331e-01f, 7.075556e-01f, 7.102645e-01f, 7.129600e-01f, | ||
| 116 | 7.156424e-01f, 7.183118e-01f, 7.209683e-01f, 7.236121e-01f, 7.262435e-01f, 7.288625e-01f, | ||
| 117 | 7.314693e-01f, 7.340640e-01f, 7.366470e-01f, 7.392181e-01f, 7.417776e-01f, 7.443256e-01f, | ||
| 118 | 7.468624e-01f, 7.493880e-01f, 7.519025e-01f, 7.544061e-01f, 7.568989e-01f, 7.593810e-01f, | ||
| 119 | 7.618526e-01f, 7.643137e-01f, 7.667645e-01f, 7.692052e-01f, 7.716358e-01f, 7.740564e-01f, | ||
| 120 | 7.764671e-01f, 7.788681e-01f, 7.812595e-01f, 7.836413e-01f, 7.860138e-01f, 7.883768e-01f, | ||
| 121 | 7.907307e-01f, 7.930754e-01f, 7.954110e-01f, 7.977377e-01f, 8.000556e-01f, 8.023647e-01f, | ||
| 122 | 8.046651e-01f, 8.069569e-01f, 8.092403e-01f, 8.115152e-01f, 8.137818e-01f, 8.160402e-01f, | ||
| 123 | 8.182903e-01f, 8.205324e-01f, 8.227665e-01f, 8.249926e-01f, 8.272109e-01f, 8.294214e-01f, | ||
| 124 | 8.316242e-01f, 8.338194e-01f, 8.360070e-01f, 8.381871e-01f, 8.403597e-01f, 8.425251e-01f, | ||
| 125 | 8.446831e-01f, 8.468339e-01f, 8.489776e-01f, 8.511142e-01f, 8.532437e-01f, 8.553662e-01f, | ||
| 126 | 8.574819e-01f, 8.595907e-01f, 8.616927e-01f, 8.637881e-01f, 8.658767e-01f, 8.679587e-01f, | ||
| 127 | 8.700342e-01f, 8.721032e-01f, 8.741657e-01f, 8.762218e-01f, 8.782716e-01f, 8.803151e-01f, | ||
| 128 | 8.823524e-01f, 8.843835e-01f, 8.864085e-01f, 8.884274e-01f, 8.904402e-01f, 8.924471e-01f, | ||
| 129 | 8.944480e-01f, 8.964431e-01f, 8.984324e-01f, 9.004158e-01f, 9.023935e-01f, 9.043654e-01f, | ||
| 130 | 9.063318e-01f, 9.082925e-01f, 9.102476e-01f, 9.121972e-01f, 9.141413e-01f, 9.160800e-01f, | ||
| 131 | 9.180133e-01f, 9.199412e-01f, 9.218637e-01f, 9.237810e-01f, 9.256931e-01f, 9.276000e-01f, | ||
| 132 | 9.295017e-01f, 9.313982e-01f, 9.332896e-01f, 9.351761e-01f, 9.370575e-01f, 9.389339e-01f, | ||
| 133 | 9.408054e-01f, 9.426719e-01f, 9.445336e-01f, 9.463905e-01f, 9.482424e-01f, 9.500897e-01f, | ||
| 134 | 9.519322e-01f, 9.537700e-01f, 9.556032e-01f, 9.574316e-01f, 9.592555e-01f, 9.610748e-01f, | ||
| 135 | 9.628896e-01f, 9.646998e-01f, 9.665055e-01f, 9.683068e-01f, 9.701037e-01f, 9.718961e-01f, | ||
| 136 | 9.736842e-01f, 9.754679e-01f, 9.772474e-01f, 9.790225e-01f, 9.807934e-01f, 9.825601e-01f, | ||
| 137 | 9.843225e-01f, 9.860808e-01f, 9.878350e-01f, 9.895850e-01f, 9.913309e-01f, 9.930727e-01f, | ||
| 138 | 9.948106e-01f, 9.965444e-01f, 9.982741e-01f, 1.000000e+00f}; | ||
| 139 | |||
| 140 | } // namespace | ||
| 141 | |||
| 142 | struct R32G32B32A32_FLOATTraits { | ||
| 143 | static constexpr size_t num_components = 4; | ||
| 144 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 145 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 146 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 147 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 148 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 149 | }; | ||
| 150 | |||
| 151 | struct R32G32B32A32_SINTTraits { | ||
| 152 | static constexpr size_t num_components = 4; | ||
| 153 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 154 | ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; | ||
| 155 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 156 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 157 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 158 | }; | ||
| 159 | |||
| 160 | struct R32G32B32A32_UINTTraits { | ||
| 161 | static constexpr size_t num_components = 4; | ||
| 162 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 163 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 164 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 165 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 166 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 167 | }; | ||
| 168 | |||
| 169 | struct R32G32B32X32_FLOATTraits { | ||
| 170 | static constexpr size_t num_components = 4; | ||
| 171 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 172 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 173 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 174 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 175 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; | ||
| 176 | }; | ||
| 177 | |||
| 178 | struct R32G32B32X32_SINTTraits { | ||
| 179 | static constexpr size_t num_components = 4; | ||
| 180 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 181 | ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; | ||
| 182 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 183 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 184 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; | ||
| 185 | }; | ||
| 186 | |||
| 187 | struct R32G32B32X32_UINTTraits { | ||
| 188 | static constexpr size_t num_components = 4; | ||
| 189 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 190 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 191 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; | ||
| 192 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 193 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; | ||
| 194 | }; | ||
| 195 | |||
| 196 | struct R16G16B16A16_UNORMTraits { | ||
| 197 | static constexpr size_t num_components = 4; | ||
| 198 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 199 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 200 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 201 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 202 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 203 | }; | ||
| 204 | |||
| 205 | struct R16G16B16A16_SNORMTraits { | ||
| 206 | static constexpr size_t num_components = 4; | ||
| 207 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 208 | ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; | ||
| 209 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 210 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 211 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 212 | }; | ||
| 213 | |||
| 214 | struct R16G16B16A16_SINTTraits { | ||
| 215 | static constexpr size_t num_components = 4; | ||
| 216 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 217 | ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; | ||
| 218 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 219 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 220 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 221 | }; | ||
| 222 | |||
| 223 | struct R16G16B16A16_UINTTraits { | ||
| 224 | static constexpr size_t num_components = 4; | ||
| 225 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 226 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 227 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 228 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 229 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 230 | }; | ||
| 231 | |||
| 232 | struct R16G16B16A16_FLOATTraits { | ||
| 233 | static constexpr size_t num_components = 4; | ||
| 234 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 235 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 236 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 237 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 238 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; | ||
| 239 | }; | ||
| 240 | |||
| 241 | struct R32G32_FLOATTraits { | ||
| 242 | static constexpr size_t num_components = 2; | ||
| 243 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 244 | ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 245 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; | ||
| 246 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 247 | Swizzle::G}; | ||
| 248 | }; | ||
| 249 | |||
| 250 | struct R32G32_SINTTraits { | ||
| 251 | static constexpr size_t num_components = 2; | ||
| 252 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 253 | ComponentType::SINT, ComponentType::SINT}; | ||
| 254 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; | ||
| 255 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 256 | Swizzle::G}; | ||
| 257 | }; | ||
| 258 | |||
| 259 | struct R32G32_UINTTraits { | ||
| 260 | static constexpr size_t num_components = 2; | ||
| 261 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 262 | ComponentType::UINT, ComponentType::UINT}; | ||
| 263 | static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; | ||
| 264 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 265 | Swizzle::G}; | ||
| 266 | }; | ||
| 267 | |||
| 268 | struct R16G16B16X16_FLOATTraits { | ||
| 269 | static constexpr size_t num_components = 4; | ||
| 270 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 271 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 272 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; | ||
| 273 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 274 | Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; | ||
| 275 | }; | ||
| 276 | |||
| 277 | struct A8R8G8B8_UNORMTraits { | ||
| 278 | static constexpr size_t num_components = 4; | ||
| 279 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 280 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 281 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 282 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 283 | Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 284 | }; | ||
| 285 | |||
| 286 | struct A8R8G8B8_SRGBTraits { | ||
| 287 | static constexpr size_t num_components = 4; | ||
| 288 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 289 | ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; | ||
| 290 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 291 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 292 | Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 293 | }; | ||
| 294 | |||
| 295 | struct A2B10G10R10_UNORMTraits { | ||
| 296 | static constexpr size_t num_components = 4; | ||
| 297 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 298 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 299 | static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10}; | ||
| 300 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 301 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 302 | }; | ||
| 303 | |||
| 304 | struct A2B10G10R10_UINTTraits { | ||
| 305 | static constexpr size_t num_components = 4; | ||
| 306 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 307 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 308 | static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10}; | ||
| 309 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 310 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 311 | }; | ||
| 312 | |||
| 313 | struct A2R10G10B10_UNORMTraits { | ||
| 314 | static constexpr size_t num_components = 4; | ||
| 315 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 316 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 317 | static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10}; | ||
| 318 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 319 | Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 320 | }; | ||
| 321 | |||
| 322 | struct A8B8G8R8_UNORMTraits { | ||
| 323 | static constexpr size_t num_components = 4; | ||
| 324 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 325 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 326 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 327 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 328 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 329 | }; | ||
| 330 | |||
| 331 | struct A8B8G8R8_SRGBTraits { | ||
| 332 | static constexpr size_t num_components = 4; | ||
| 333 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 334 | ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; | ||
| 335 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 336 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 337 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 338 | }; | ||
| 339 | |||
| 340 | struct A8B8G8R8_SNORMTraits { | ||
| 341 | static constexpr size_t num_components = 4; | ||
| 342 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 343 | ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; | ||
| 344 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 345 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 346 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 347 | }; | ||
| 348 | |||
| 349 | struct A8B8G8R8_SINTTraits { | ||
| 350 | static constexpr size_t num_components = 4; | ||
| 351 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 352 | ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; | ||
| 353 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 354 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 355 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 356 | }; | ||
| 357 | |||
| 358 | struct A8B8G8R8_UINTTraits { | ||
| 359 | static constexpr size_t num_components = 4; | ||
| 360 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 361 | ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; | ||
| 362 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 363 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 364 | Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 365 | }; | ||
| 366 | |||
| 367 | struct R16G16_UNORMTraits { | ||
| 368 | static constexpr size_t num_components = 2; | ||
| 369 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 370 | ComponentType::UNORM, ComponentType::UNORM}; | ||
| 371 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 372 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 373 | Swizzle::G}; | ||
| 374 | }; | ||
| 375 | |||
| 376 | struct R16G16_SNORMTraits { | ||
| 377 | static constexpr size_t num_components = 2; | ||
| 378 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 379 | ComponentType::SNORM, ComponentType::SNORM}; | ||
| 380 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 381 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 382 | Swizzle::G}; | ||
| 383 | }; | ||
| 384 | |||
| 385 | struct R16G16_SINTTraits { | ||
| 386 | static constexpr size_t num_components = 2; | ||
| 387 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 388 | ComponentType::SINT, ComponentType::SINT}; | ||
| 389 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 390 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 391 | Swizzle::G}; | ||
| 392 | }; | ||
| 393 | |||
| 394 | struct R16G16_UINTTraits { | ||
| 395 | static constexpr size_t num_components = 2; | ||
| 396 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 397 | ComponentType::UINT, ComponentType::UINT}; | ||
| 398 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 399 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 400 | Swizzle::G}; | ||
| 401 | }; | ||
| 402 | |||
| 403 | struct R16G16_FLOATTraits { | ||
| 404 | static constexpr size_t num_components = 2; | ||
| 405 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 406 | ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 407 | static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; | ||
| 408 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 409 | Swizzle::G}; | ||
| 410 | }; | ||
| 411 | |||
| 412 | struct B10G11R11_FLOATTraits { | ||
| 413 | static constexpr size_t num_components = 3; | ||
| 414 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 415 | ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; | ||
| 416 | static constexpr std::array<size_t, num_components> component_sizes = {10, 11, 11}; | ||
| 417 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 418 | Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 419 | }; | ||
| 420 | |||
| 421 | struct R32_SINTTraits { | ||
| 422 | static constexpr size_t num_components = 1; | ||
| 423 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 424 | ComponentType::SINT}; | ||
| 425 | static constexpr std::array<size_t, num_components> component_sizes = {32}; | ||
| 426 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 427 | }; | ||
| 428 | |||
| 429 | struct R32_UINTTraits { | ||
| 430 | static constexpr size_t num_components = 1; | ||
| 431 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 432 | ComponentType::UINT}; | ||
| 433 | static constexpr std::array<size_t, num_components> component_sizes = {32}; | ||
| 434 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 435 | }; | ||
| 436 | |||
| 437 | struct R32_FLOATTraits { | ||
| 438 | static constexpr size_t num_components = 1; | ||
| 439 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 440 | ComponentType::FLOAT}; | ||
| 441 | static constexpr std::array<size_t, num_components> component_sizes = {32}; | ||
| 442 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 443 | }; | ||
| 444 | |||
| 445 | struct X8R8G8B8_UNORMTraits { | ||
| 446 | static constexpr size_t num_components = 4; | ||
| 447 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 448 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 449 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 450 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 451 | Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 452 | }; | ||
| 453 | |||
| 454 | struct X8R8G8B8_SRGBTraits { | ||
| 455 | static constexpr size_t num_components = 4; | ||
| 456 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 457 | ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; | ||
| 458 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 459 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 460 | Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 461 | }; | ||
| 462 | |||
| 463 | struct R5G6B5_UNORMTraits { | ||
| 464 | static constexpr size_t num_components = 3; | ||
| 465 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 466 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 467 | static constexpr std::array<size_t, num_components> component_sizes = {5, 6, 5}; | ||
| 468 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 469 | Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 470 | }; | ||
| 471 | |||
| 472 | struct A1R5G5B5_UNORMTraits { | ||
| 473 | static constexpr size_t num_components = 4; | ||
| 474 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 475 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 476 | static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5}; | ||
| 477 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 478 | Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 479 | }; | ||
| 480 | |||
| 481 | struct R8G8_UNORMTraits { | ||
| 482 | static constexpr size_t num_components = 2; | ||
| 483 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 484 | ComponentType::UNORM, ComponentType::UNORM}; | ||
| 485 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 486 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 487 | Swizzle::G}; | ||
| 488 | }; | ||
| 489 | |||
| 490 | struct R8G8_SNORMTraits { | ||
| 491 | static constexpr size_t num_components = 2; | ||
| 492 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 493 | ComponentType::SNORM, ComponentType::SNORM}; | ||
| 494 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 495 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 496 | Swizzle::G}; | ||
| 497 | }; | ||
| 498 | |||
| 499 | struct R8G8_SINTTraits { | ||
| 500 | static constexpr size_t num_components = 2; | ||
| 501 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 502 | ComponentType::SINT, ComponentType::SINT}; | ||
| 503 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 504 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 505 | Swizzle::G}; | ||
| 506 | }; | ||
| 507 | |||
| 508 | struct R8G8_UINTTraits { | ||
| 509 | static constexpr size_t num_components = 2; | ||
| 510 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 511 | ComponentType::UINT, ComponentType::UINT}; | ||
| 512 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; | ||
| 513 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, | ||
| 514 | Swizzle::G}; | ||
| 515 | }; | ||
| 516 | |||
| 517 | struct R16_UNORMTraits { | ||
| 518 | static constexpr size_t num_components = 1; | ||
| 519 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 520 | ComponentType::UNORM}; | ||
| 521 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 522 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 523 | }; | ||
| 524 | |||
| 525 | struct R16_SNORMTraits { | ||
| 526 | static constexpr size_t num_components = 1; | ||
| 527 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 528 | ComponentType::SNORM}; | ||
| 529 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 530 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 531 | }; | ||
| 532 | |||
| 533 | struct R16_SINTTraits { | ||
| 534 | static constexpr size_t num_components = 1; | ||
| 535 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 536 | ComponentType::SINT}; | ||
| 537 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 538 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 539 | }; | ||
| 540 | |||
| 541 | struct R16_UINTTraits { | ||
| 542 | static constexpr size_t num_components = 1; | ||
| 543 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 544 | ComponentType::UINT}; | ||
| 545 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 546 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 547 | }; | ||
| 548 | |||
| 549 | struct R16_FLOATTraits { | ||
| 550 | static constexpr size_t num_components = 1; | ||
| 551 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 552 | ComponentType::FLOAT}; | ||
| 553 | static constexpr std::array<size_t, num_components> component_sizes = {16}; | ||
| 554 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 555 | }; | ||
| 556 | |||
| 557 | struct R8_UNORMTraits { | ||
| 558 | static constexpr size_t num_components = 1; | ||
| 559 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 560 | ComponentType::UNORM}; | ||
| 561 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 562 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 563 | }; | ||
| 564 | |||
| 565 | struct R8_SNORMTraits { | ||
| 566 | static constexpr size_t num_components = 1; | ||
| 567 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 568 | ComponentType::SNORM}; | ||
| 569 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 570 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 571 | }; | ||
| 572 | |||
| 573 | struct R8_SINTTraits { | ||
| 574 | static constexpr size_t num_components = 1; | ||
| 575 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 576 | ComponentType::SINT}; | ||
| 577 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 578 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 579 | }; | ||
| 580 | |||
| 581 | struct R8_UINTTraits { | ||
| 582 | static constexpr size_t num_components = 1; | ||
| 583 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 584 | ComponentType::UINT}; | ||
| 585 | static constexpr std::array<size_t, num_components> component_sizes = {8}; | ||
| 586 | static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; | ||
| 587 | }; | ||
| 588 | |||
| 589 | struct X1R5G5B5_UNORMTraits { | ||
| 590 | static constexpr size_t num_components = 4; | ||
| 591 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 592 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 593 | static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5}; | ||
| 594 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 595 | Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; | ||
| 596 | }; | ||
| 597 | |||
| 598 | struct X8B8G8R8_UNORMTraits { | ||
| 599 | static constexpr size_t num_components = 4; | ||
| 600 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 601 | ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; | ||
| 602 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 603 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 604 | Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 605 | }; | ||
| 606 | |||
| 607 | struct X8B8G8R8_SRGBTraits { | ||
| 608 | static constexpr size_t num_components = 4; | ||
| 609 | static constexpr std::array<ComponentType, num_components> component_types = { | ||
| 610 | ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; | ||
| 611 | static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; | ||
| 612 | static constexpr std::array<Swizzle, num_components> component_swizzle = { | ||
| 613 | Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; | ||
| 614 | }; | ||
| 615 | |||
| 616 | template <class ConverterTraits> | ||
| 617 | class ConverterImpl : public Converter { | ||
| 618 | private: | ||
| 619 | static constexpr size_t num_components = ConverterTraits::num_components; | ||
| 620 | static constexpr std::array<ComponentType, num_components> component_types = | ||
| 621 | ConverterTraits::component_types; | ||
| 622 | static constexpr std::array<size_t, num_components> component_sizes = | ||
| 623 | ConverterTraits::component_sizes; | ||
| 624 | static constexpr std::array<Swizzle, num_components> component_swizzle = | ||
| 625 | ConverterTraits::component_swizzle; | ||
| 626 | |||
| 627 | static constexpr size_t CalculateByteSize() { | ||
| 628 | size_t size = 0; | ||
| 629 | for (const size_t component_size : component_sizes) { | ||
| 630 | size += component_size; | ||
| 631 | } | ||
| 632 | const size_t power = (sizeof(size_t) * 8) - std::countl_zero(size) - 1ULL; | ||
| 633 | const size_t base_size = 1ULL << power; | ||
| 634 | const size_t mask = base_size - 1ULL; | ||
| 635 | return ((size & mask) != 0 ? base_size << 1ULL : base_size) / 8; | ||
| 636 | } | ||
| 637 | |||
| 638 | static constexpr size_t total_bytes_per_pixel = CalculateByteSize(); | ||
| 639 | static constexpr size_t total_words_per_pixel = | ||
| 640 | (total_bytes_per_pixel + sizeof(u32) - 1U) / sizeof(u32); | ||
| 641 | static constexpr size_t components_per_ir_rep = 4; | ||
| 642 | |||
| 643 | template <bool get_offsets> | ||
| 644 | static constexpr std::array<size_t, num_components> GetBoundWordsOffsets() { | ||
| 645 | std::array<size_t, num_components> result; | ||
| 646 | result.fill(0); | ||
| 647 | constexpr size_t total_bits_per_word = sizeof(u32) * 8; | ||
| 648 | size_t accumulated_size = 0; | ||
| 649 | size_t count = 0; | ||
| 650 | for (size_t i = 0; i < num_components; i++) { | ||
| 651 | if constexpr (get_offsets) { | ||
| 652 | result[i] = accumulated_size; | ||
| 653 | } else { | ||
| 654 | result[i] = count; | ||
| 655 | } | ||
| 656 | accumulated_size += component_sizes[i]; | ||
| 657 | if (accumulated_size > total_bits_per_word) { | ||
| 658 | if constexpr (get_offsets) { | ||
| 659 | result[i] = 0; | ||
| 660 | } else { | ||
| 661 | result[i]++; | ||
| 662 | } | ||
| 663 | count++; | ||
| 664 | accumulated_size = component_sizes[i]; | ||
| 665 | } | ||
| 666 | } | ||
| 667 | return result; | ||
| 668 | } | ||
| 669 | |||
| 670 | static constexpr std::array<size_t, num_components> bound_words = GetBoundWordsOffsets<false>(); | ||
| 671 | static constexpr std::array<size_t, num_components> bound_offsets = | ||
| 672 | GetBoundWordsOffsets<true>(); | ||
| 673 | |||
| 674 | static constexpr std::array<u32, num_components> GetComponentsMask() { | ||
| 675 | std::array<u32, num_components> result; | ||
| 676 | for (size_t i = 0; i < num_components; i++) { | ||
| 677 | result[i] = (((u32)~0) >> (8 * sizeof(u32) - component_sizes[i])) << bound_offsets[i]; | ||
| 678 | } | ||
| 679 | return result; | ||
| 680 | } | ||
| 681 | |||
| 682 | static constexpr std::array<u32, num_components> component_mask = GetComponentsMask(); | ||
| 683 | |||
| 684 | // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function | ||
| 685 | // calls, it may fail to detect the benefit of inlining. | ||
| 686 | template <size_t which_component> | ||
| 687 | FORCE_INLINE void ConvertToComponent(u32 which_word, f32& out_component) { | ||
| 688 | const u32 value = (which_word >> bound_offsets[which_component]) & | ||
| 689 | static_cast<u32>((1ULL << component_sizes[which_component]) - 1ULL); | ||
| 690 | const auto sign_extend = [](u32 base_value, size_t bits) { | ||
| 691 | const size_t shift_amount = sizeof(u32) * 8 - bits; | ||
| 692 | s32 shifted_value = static_cast<s32>(base_value << shift_amount); | ||
| 693 | return shifted_value >> shift_amount; | ||
| 694 | }; | ||
| 695 | const auto force_to_fp16 = [](f32 base_value) { | ||
| 696 | u32 tmp = std::bit_cast<u32>(base_value); | ||
| 697 | constexpr size_t fp32_mantissa_bits = 23; | ||
| 698 | constexpr size_t fp16_mantissa_bits = 10; | ||
| 699 | constexpr size_t mantissa_mask = | ||
| 700 | ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL); | ||
| 701 | tmp = tmp & static_cast<u32>(mantissa_mask); | ||
| 702 | // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM | ||
| 703 | return std::bit_cast<f32>(tmp); | ||
| 704 | }; | ||
| 705 | const auto from_fp_n = [&sign_extend](u32 base_value, size_t bits, size_t mantissa) { | ||
| 706 | constexpr size_t fp32_mantissa_bits = 23; | ||
| 707 | size_t shift_towards = fp32_mantissa_bits - mantissa; | ||
| 708 | const u32 new_value = | ||
| 709 | static_cast<u32>(sign_extend(base_value, bits) << shift_towards) & (~(1U << 31)); | ||
| 710 | return std::bit_cast<f32>(new_value); | ||
| 711 | }; | ||
| 712 | const auto calculate_snorm = [&]() { | ||
| 713 | return static_cast<f32>( | ||
| 714 | static_cast<f32>(sign_extend(value, component_sizes[which_component])) / | ||
| 715 | static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); | ||
| 716 | }; | ||
| 717 | const auto calculate_unorm = [&]() { | ||
| 718 | return static_cast<f32>( | ||
| 719 | static_cast<f32>(value) / | ||
| 720 | static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL)); | ||
| 721 | }; | ||
| 722 | if constexpr (component_types[which_component] == ComponentType::SNORM) { | ||
| 723 | out_component = calculate_snorm(); | ||
| 724 | } else if constexpr (component_types[which_component] == ComponentType::UNORM) { | ||
| 725 | out_component = calculate_unorm(); | ||
| 726 | } else if constexpr (component_types[which_component] == ComponentType::SINT) { | ||
| 727 | out_component = static_cast<f32>( | ||
| 728 | static_cast<s32>(sign_extend(value, component_sizes[which_component]))); | ||
| 729 | } else if constexpr (component_types[which_component] == ComponentType::UINT) { | ||
| 730 | out_component = static_cast<f32>( | ||
| 731 | static_cast<s32>(sign_extend(value, component_sizes[which_component]))); | ||
| 732 | } else if constexpr (component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { | ||
| 733 | out_component = calculate_snorm(); | ||
| 734 | out_component = force_to_fp16(out_component); | ||
| 735 | } else if constexpr (component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { | ||
| 736 | out_component = calculate_unorm(); | ||
| 737 | out_component = force_to_fp16(out_component); | ||
| 738 | } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { | ||
| 739 | if constexpr (component_sizes[which_component] == 32) { | ||
| 740 | out_component = std::bit_cast<f32>(value); | ||
| 741 | } else if constexpr (component_sizes[which_component] == 16) { | ||
| 742 | static constexpr u32 sign_mask = 0x8000; | ||
| 743 | static constexpr u32 mantissa_mask = 0x8000; | ||
| 744 | out_component = std::bit_cast<f32>(((value & sign_mask) << 16) | | ||
| 745 | (((value & 0x7c00) + 0x1C000) << 13) | | ||
| 746 | ((value & mantissa_mask) << 13)); | ||
| 747 | } else { | ||
| 748 | out_component = from_fp_n(value, component_sizes[which_component], | ||
| 749 | component_sizes[which_component] - 5); | ||
| 750 | } | ||
| 751 | } else if constexpr (component_types[which_component] == ComponentType::SRGB) { | ||
| 752 | if constexpr (component_swizzle[which_component] == Swizzle::A) { | ||
| 753 | out_component = calculate_unorm(); | ||
| 754 | } else if constexpr (component_sizes[which_component] == 8) { | ||
| 755 | out_component = SRGB_TO_RGB_LUT[value]; | ||
| 756 | } else { | ||
| 757 | out_component = calculate_unorm(); | ||
| 758 | UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", | ||
| 759 | component_sizes[which_component]); | ||
| 760 | } | ||
| 761 | } | ||
| 762 | } | ||
| 763 | |||
| 764 | // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function | ||
| 765 | // calls, it may fail to detect the benefit of inlining. | ||
| 766 | template <size_t which_component> | ||
| 767 | FORCE_INLINE void ConvertFromComponent(u32& which_word, f32 in_component) { | ||
| 768 | const auto insert_to_word = [&]<typename T>(T new_word) { | ||
| 769 | which_word |= (static_cast<u32>(new_word) << bound_offsets[which_component]) & | ||
| 770 | component_mask[which_component]; | ||
| 771 | }; | ||
| 772 | const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) { | ||
| 773 | constexpr size_t fp32_mantissa_bits = 23; | ||
| 774 | u32 tmp_value = std::bit_cast<u32>(std::max(base_value, 0.0f)); | ||
| 775 | size_t shift_towards = fp32_mantissa_bits - mantissa; | ||
| 776 | return tmp_value >> shift_towards; | ||
| 777 | }; | ||
| 778 | const auto calculate_unorm = [&]() { | ||
| 779 | return static_cast<u32>( | ||
| 780 | static_cast<f32>(in_component) * | ||
| 781 | static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL)); | ||
| 782 | }; | ||
| 783 | if constexpr (component_types[which_component] == ComponentType::SNORM || | ||
| 784 | component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { | ||
| 785 | s32 tmp_word = static_cast<s32>( | ||
| 786 | static_cast<f32>(in_component) * | ||
| 787 | static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); | ||
| 788 | insert_to_word(tmp_word); | ||
| 789 | |||
| 790 | } else if constexpr (component_types[which_component] == ComponentType::UNORM || | ||
| 791 | component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { | ||
| 792 | u32 tmp_word = calculate_unorm(); | ||
| 793 | insert_to_word(tmp_word); | ||
| 794 | } else if constexpr (component_types[which_component] == ComponentType::SINT) { | ||
| 795 | s32 tmp_word = static_cast<s32>(in_component); | ||
| 796 | insert_to_word(tmp_word); | ||
| 797 | } else if constexpr (component_types[which_component] == ComponentType::UINT) { | ||
| 798 | u32 tmp_word = static_cast<u32>(in_component); | ||
| 799 | insert_to_word(tmp_word); | ||
| 800 | } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { | ||
| 801 | if constexpr (component_sizes[which_component] == 32) { | ||
| 802 | u32 tmp_word = std::bit_cast<u32>(in_component); | ||
| 803 | insert_to_word(tmp_word); | ||
| 804 | } else if constexpr (component_sizes[which_component] == 16) { | ||
| 805 | static constexpr u32 sign_mask = 0x8000; | ||
| 806 | static constexpr u32 mantissa_mask = 0x03ff; | ||
| 807 | static constexpr u32 exponent_mask = 0x7c00; | ||
| 808 | const u32 tmp_word = std::bit_cast<u32>(in_component); | ||
| 809 | const u32 half = ((tmp_word >> 16) & sign_mask) | | ||
| 810 | ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & exponent_mask) | | ||
| 811 | ((tmp_word >> 13) & mantissa_mask); | ||
| 812 | insert_to_word(half); | ||
| 813 | } else { | ||
| 814 | insert_to_word(to_fp_n(in_component, component_sizes[which_component], | ||
| 815 | component_sizes[which_component] - 5)); | ||
| 816 | } | ||
| 817 | } else if constexpr (component_types[which_component] == ComponentType::SRGB) { | ||
| 818 | if constexpr (component_swizzle[which_component] != Swizzle::A) { | ||
| 819 | if constexpr (component_sizes[which_component] == 8) { | ||
| 820 | const u32 index = calculate_unorm(); | ||
| 821 | in_component = RGB_TO_SRGB_LUT[index]; | ||
| 822 | } else { | ||
| 823 | UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", | ||
| 824 | component_sizes[which_component]); | ||
| 825 | } | ||
| 826 | } | ||
| 827 | const u32 tmp_word = calculate_unorm(); | ||
| 828 | insert_to_word(tmp_word); | ||
| 829 | } | ||
| 830 | } | ||
| 831 | |||
| 832 | public: | ||
| 833 | void ConvertTo(std::span<const u8> input, std::span<f32> output) override { | ||
| 834 | const size_t num_pixels = output.size() / components_per_ir_rep; | ||
| 835 | for (size_t pixel = 0; pixel < num_pixels; pixel++) { | ||
| 836 | std::array<u32, total_words_per_pixel> words{}; | ||
| 837 | |||
| 838 | std::memcpy(words.data(), &input[pixel * total_bytes_per_pixel], total_bytes_per_pixel); | ||
| 839 | std::span<f32> new_components(&output[pixel * components_per_ir_rep], | ||
| 840 | components_per_ir_rep); | ||
| 841 | if constexpr (component_swizzle[0] != Swizzle::None) { | ||
| 842 | ConvertToComponent<0>(words[bound_words[0]], | ||
| 843 | new_components[static_cast<size_t>(component_swizzle[0])]); | ||
| 844 | } else { | ||
| 845 | new_components[0] = 0.0f; | ||
| 846 | } | ||
| 847 | if constexpr (num_components >= 2) { | ||
| 848 | if constexpr (component_swizzle[1] != Swizzle::None) { | ||
| 849 | ConvertToComponent<1>( | ||
| 850 | words[bound_words[1]], | ||
| 851 | new_components[static_cast<size_t>(component_swizzle[1])]); | ||
| 852 | } else { | ||
| 853 | new_components[1] = 0.0f; | ||
| 854 | } | ||
| 855 | } else { | ||
| 856 | new_components[1] = 0.0f; | ||
| 857 | } | ||
| 858 | if constexpr (num_components >= 3) { | ||
| 859 | if constexpr (component_swizzle[2] != Swizzle::None) { | ||
| 860 | ConvertToComponent<2>( | ||
| 861 | words[bound_words[2]], | ||
| 862 | new_components[static_cast<size_t>(component_swizzle[2])]); | ||
| 863 | } else { | ||
| 864 | new_components[2] = 0.0f; | ||
| 865 | } | ||
| 866 | } else { | ||
| 867 | new_components[2] = 0.0f; | ||
| 868 | } | ||
| 869 | if constexpr (num_components >= 4) { | ||
| 870 | if constexpr (component_swizzle[3] != Swizzle::None) { | ||
| 871 | ConvertToComponent<3>( | ||
| 872 | words[bound_words[3]], | ||
| 873 | new_components[static_cast<size_t>(component_swizzle[3])]); | ||
| 874 | } else { | ||
| 875 | new_components[3] = 0.0f; | ||
| 876 | } | ||
| 877 | } else { | ||
| 878 | new_components[3] = 0.0f; | ||
| 879 | } | ||
| 880 | } | ||
| 881 | } | ||
| 882 | |||
| 883 | void ConvertFrom(std::span<const f32> input, std::span<u8> output) override { | ||
| 884 | const size_t num_pixels = output.size() / total_bytes_per_pixel; | ||
| 885 | for (size_t pixel = 0; pixel < num_pixels; pixel++) { | ||
| 886 | std::span<const f32> old_components(&input[pixel * components_per_ir_rep], | ||
| 887 | components_per_ir_rep); | ||
| 888 | std::array<u32, total_words_per_pixel> words{}; | ||
| 889 | if constexpr (component_swizzle[0] != Swizzle::None) { | ||
| 890 | ConvertFromComponent<0>(words[bound_words[0]], | ||
| 891 | old_components[static_cast<size_t>(component_swizzle[0])]); | ||
| 892 | } | ||
| 893 | if constexpr (num_components >= 2) { | ||
| 894 | if constexpr (component_swizzle[1] != Swizzle::None) { | ||
| 895 | ConvertFromComponent<1>( | ||
| 896 | words[bound_words[1]], | ||
| 897 | old_components[static_cast<size_t>(component_swizzle[1])]); | ||
| 898 | } | ||
| 899 | } | ||
| 900 | if constexpr (num_components >= 3) { | ||
| 901 | if constexpr (component_swizzle[2] != Swizzle::None) { | ||
| 902 | ConvertFromComponent<2>( | ||
| 903 | words[bound_words[2]], | ||
| 904 | old_components[static_cast<size_t>(component_swizzle[2])]); | ||
| 905 | } | ||
| 906 | } | ||
| 907 | if constexpr (num_components >= 4) { | ||
| 908 | if constexpr (component_swizzle[3] != Swizzle::None) { | ||
| 909 | ConvertFromComponent<3>( | ||
| 910 | words[bound_words[3]], | ||
| 911 | old_components[static_cast<size_t>(component_swizzle[3])]); | ||
| 912 | } | ||
| 913 | } | ||
| 914 | std::memcpy(&output[pixel * total_bytes_per_pixel], words.data(), | ||
| 915 | total_bytes_per_pixel); | ||
| 916 | } | ||
| 917 | } | ||
| 918 | |||
| 919 | ConverterImpl() = default; | ||
| 920 | ~ConverterImpl() override = default; | ||
| 921 | }; | ||
| 922 | |||
| 923 | struct ConverterFactory::ConverterFactoryImpl { | ||
| 924 | std::unordered_map<RenderTargetFormat, std::unique_ptr<Converter>> converters_cache; | ||
| 925 | }; | ||
| 926 | |||
| 927 | ConverterFactory::ConverterFactory() { | ||
| 928 | impl = std::make_unique<ConverterFactoryImpl>(); | ||
| 929 | } | ||
| 930 | |||
| 931 | ConverterFactory::~ConverterFactory() = default; | ||
| 932 | |||
| 933 | Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) { | ||
| 934 | auto it = impl->converters_cache.find(format); | ||
| 935 | if (it == impl->converters_cache.end()) [[unlikely]] { | ||
| 936 | return BuildConverter(format); | ||
| 937 | } | ||
| 938 | return it->second.get(); | ||
| 939 | } | ||
| 940 | |||
| 941 | class NullConverter : public Converter { | ||
| 942 | public: | ||
| 943 | void ConvertTo([[maybe_unused]] std::span<const u8> input, std::span<f32> output) override { | ||
| 944 | std::fill(output.begin(), output.end(), 0.0f); | ||
| 945 | } | ||
| 946 | void ConvertFrom([[maybe_unused]] std::span<const f32> input, std::span<u8> output) override { | ||
| 947 | const u8 fill_value = 0U; | ||
| 948 | std::fill(output.begin(), output.end(), fill_value); | ||
| 949 | } | ||
| 950 | NullConverter() = default; | ||
| 951 | ~NullConverter() = default; | ||
| 952 | }; | ||
| 953 | |||
| 954 | Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { | ||
| 955 | switch (format) { | ||
| 956 | case RenderTargetFormat::R32G32B32A32_FLOAT: | ||
| 957 | return impl->converters_cache | ||
| 958 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_FLOATTraits>>()) | ||
| 959 | .first->second.get(); | ||
| 960 | break; | ||
| 961 | case RenderTargetFormat::R32G32B32A32_SINT: | ||
| 962 | return impl->converters_cache | ||
| 963 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_SINTTraits>>()) | ||
| 964 | .first->second.get(); | ||
| 965 | break; | ||
| 966 | case RenderTargetFormat::R32G32B32A32_UINT: | ||
| 967 | return impl->converters_cache | ||
| 968 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_UINTTraits>>()) | ||
| 969 | .first->second.get(); | ||
| 970 | break; | ||
| 971 | case RenderTargetFormat::R32G32B32X32_FLOAT: | ||
| 972 | return impl->converters_cache | ||
| 973 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_FLOATTraits>>()) | ||
| 974 | .first->second.get(); | ||
| 975 | break; | ||
| 976 | case RenderTargetFormat::R32G32B32X32_SINT: | ||
| 977 | return impl->converters_cache | ||
| 978 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_SINTTraits>>()) | ||
| 979 | .first->second.get(); | ||
| 980 | break; | ||
| 981 | case RenderTargetFormat::R32G32B32X32_UINT: | ||
| 982 | return impl->converters_cache | ||
| 983 | .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_UINTTraits>>()) | ||
| 984 | .first->second.get(); | ||
| 985 | break; | ||
| 986 | case RenderTargetFormat::R16G16B16A16_UNORM: | ||
| 987 | return impl->converters_cache | ||
| 988 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UNORMTraits>>()) | ||
| 989 | .first->second.get(); | ||
| 990 | break; | ||
| 991 | case RenderTargetFormat::R16G16B16A16_SNORM: | ||
| 992 | return impl->converters_cache | ||
| 993 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SNORMTraits>>()) | ||
| 994 | .first->second.get(); | ||
| 995 | break; | ||
| 996 | case RenderTargetFormat::R16G16B16A16_SINT: | ||
| 997 | return impl->converters_cache | ||
| 998 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SINTTraits>>()) | ||
| 999 | .first->second.get(); | ||
| 1000 | break; | ||
| 1001 | case RenderTargetFormat::R16G16B16A16_UINT: | ||
| 1002 | return impl->converters_cache | ||
| 1003 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UINTTraits>>()) | ||
| 1004 | .first->second.get(); | ||
| 1005 | break; | ||
| 1006 | case RenderTargetFormat::R16G16B16A16_FLOAT: | ||
| 1007 | return impl->converters_cache | ||
| 1008 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_FLOATTraits>>()) | ||
| 1009 | .first->second.get(); | ||
| 1010 | break; | ||
| 1011 | case RenderTargetFormat::R32G32_FLOAT: | ||
| 1012 | return impl->converters_cache | ||
| 1013 | .emplace(format, std::make_unique<ConverterImpl<R32G32_FLOATTraits>>()) | ||
| 1014 | .first->second.get(); | ||
| 1015 | break; | ||
| 1016 | case RenderTargetFormat::R32G32_SINT: | ||
| 1017 | return impl->converters_cache | ||
| 1018 | .emplace(format, std::make_unique<ConverterImpl<R32G32_SINTTraits>>()) | ||
| 1019 | .first->second.get(); | ||
| 1020 | break; | ||
| 1021 | case RenderTargetFormat::R32G32_UINT: | ||
| 1022 | return impl->converters_cache | ||
| 1023 | .emplace(format, std::make_unique<ConverterImpl<R32G32_UINTTraits>>()) | ||
| 1024 | .first->second.get(); | ||
| 1025 | break; | ||
| 1026 | case RenderTargetFormat::R16G16B16X16_FLOAT: | ||
| 1027 | return impl->converters_cache | ||
| 1028 | .emplace(format, std::make_unique<ConverterImpl<R16G16B16X16_FLOATTraits>>()) | ||
| 1029 | .first->second.get(); | ||
| 1030 | break; | ||
| 1031 | case RenderTargetFormat::A8R8G8B8_UNORM: | ||
| 1032 | return impl->converters_cache | ||
| 1033 | .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_UNORMTraits>>()) | ||
| 1034 | .first->second.get(); | ||
| 1035 | break; | ||
| 1036 | case RenderTargetFormat::A8R8G8B8_SRGB: | ||
| 1037 | return impl->converters_cache | ||
| 1038 | .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_SRGBTraits>>()) | ||
| 1039 | .first->second.get(); | ||
| 1040 | break; | ||
| 1041 | case RenderTargetFormat::A2B10G10R10_UNORM: | ||
| 1042 | return impl->converters_cache | ||
| 1043 | .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UNORMTraits>>()) | ||
| 1044 | .first->second.get(); | ||
| 1045 | break; | ||
| 1046 | case RenderTargetFormat::A2B10G10R10_UINT: | ||
| 1047 | return impl->converters_cache | ||
| 1048 | .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UINTTraits>>()) | ||
| 1049 | .first->second.get(); | ||
| 1050 | break; | ||
| 1051 | case RenderTargetFormat::A2R10G10B10_UNORM: | ||
| 1052 | return impl->converters_cache | ||
| 1053 | .emplace(format, std::make_unique<ConverterImpl<A2R10G10B10_UNORMTraits>>()) | ||
| 1054 | .first->second.get(); | ||
| 1055 | break; | ||
| 1056 | case RenderTargetFormat::A8B8G8R8_UNORM: | ||
| 1057 | return impl->converters_cache | ||
| 1058 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UNORMTraits>>()) | ||
| 1059 | .first->second.get(); | ||
| 1060 | break; | ||
| 1061 | case RenderTargetFormat::A8B8G8R8_SRGB: | ||
| 1062 | return impl->converters_cache | ||
| 1063 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SRGBTraits>>()) | ||
| 1064 | .first->second.get(); | ||
| 1065 | break; | ||
| 1066 | case RenderTargetFormat::A8B8G8R8_SNORM: | ||
| 1067 | return impl->converters_cache | ||
| 1068 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SNORMTraits>>()) | ||
| 1069 | .first->second.get(); | ||
| 1070 | break; | ||
| 1071 | case RenderTargetFormat::A8B8G8R8_SINT: | ||
| 1072 | return impl->converters_cache | ||
| 1073 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SINTTraits>>()) | ||
| 1074 | .first->second.get(); | ||
| 1075 | break; | ||
| 1076 | case RenderTargetFormat::A8B8G8R8_UINT: | ||
| 1077 | return impl->converters_cache | ||
| 1078 | .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UINTTraits>>()) | ||
| 1079 | .first->second.get(); | ||
| 1080 | break; | ||
| 1081 | case RenderTargetFormat::R16G16_UNORM: | ||
| 1082 | return impl->converters_cache | ||
| 1083 | .emplace(format, std::make_unique<ConverterImpl<R16G16_UNORMTraits>>()) | ||
| 1084 | .first->second.get(); | ||
| 1085 | break; | ||
| 1086 | case RenderTargetFormat::R16G16_SNORM: | ||
| 1087 | return impl->converters_cache | ||
| 1088 | .emplace(format, std::make_unique<ConverterImpl<R16G16_SNORMTraits>>()) | ||
| 1089 | .first->second.get(); | ||
| 1090 | break; | ||
| 1091 | case RenderTargetFormat::R16G16_SINT: | ||
| 1092 | return impl->converters_cache | ||
| 1093 | .emplace(format, std::make_unique<ConverterImpl<R16G16_SINTTraits>>()) | ||
| 1094 | .first->second.get(); | ||
| 1095 | break; | ||
| 1096 | case RenderTargetFormat::R16G16_UINT: | ||
| 1097 | return impl->converters_cache | ||
| 1098 | .emplace(format, std::make_unique<ConverterImpl<R16G16_UINTTraits>>()) | ||
| 1099 | .first->second.get(); | ||
| 1100 | break; | ||
| 1101 | case RenderTargetFormat::R16G16_FLOAT: | ||
| 1102 | return impl->converters_cache | ||
| 1103 | .emplace(format, std::make_unique<ConverterImpl<R16G16_FLOATTraits>>()) | ||
| 1104 | .first->second.get(); | ||
| 1105 | break; | ||
| 1106 | case RenderTargetFormat::B10G11R11_FLOAT: | ||
| 1107 | return impl->converters_cache | ||
| 1108 | .emplace(format, std::make_unique<ConverterImpl<B10G11R11_FLOATTraits>>()) | ||
| 1109 | .first->second.get(); | ||
| 1110 | break; | ||
| 1111 | case RenderTargetFormat::R32_SINT: | ||
| 1112 | return impl->converters_cache | ||
| 1113 | .emplace(format, std::make_unique<ConverterImpl<R32_SINTTraits>>()) | ||
| 1114 | .first->second.get(); | ||
| 1115 | break; | ||
| 1116 | case RenderTargetFormat::R32_UINT: | ||
| 1117 | return impl->converters_cache | ||
| 1118 | .emplace(format, std::make_unique<ConverterImpl<R32_UINTTraits>>()) | ||
| 1119 | .first->second.get(); | ||
| 1120 | break; | ||
| 1121 | case RenderTargetFormat::R32_FLOAT: | ||
| 1122 | return impl->converters_cache | ||
| 1123 | .emplace(format, std::make_unique<ConverterImpl<R32_FLOATTraits>>()) | ||
| 1124 | .first->second.get(); | ||
| 1125 | break; | ||
| 1126 | case RenderTargetFormat::X8R8G8B8_UNORM: | ||
| 1127 | return impl->converters_cache | ||
| 1128 | .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_UNORMTraits>>()) | ||
| 1129 | .first->second.get(); | ||
| 1130 | break; | ||
| 1131 | case RenderTargetFormat::X8R8G8B8_SRGB: | ||
| 1132 | return impl->converters_cache | ||
| 1133 | .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_SRGBTraits>>()) | ||
| 1134 | .first->second.get(); | ||
| 1135 | break; | ||
| 1136 | case RenderTargetFormat::R5G6B5_UNORM: | ||
| 1137 | return impl->converters_cache | ||
| 1138 | .emplace(format, std::make_unique<ConverterImpl<R5G6B5_UNORMTraits>>()) | ||
| 1139 | .first->second.get(); | ||
| 1140 | break; | ||
| 1141 | case RenderTargetFormat::A1R5G5B5_UNORM: | ||
| 1142 | return impl->converters_cache | ||
| 1143 | .emplace(format, std::make_unique<ConverterImpl<A1R5G5B5_UNORMTraits>>()) | ||
| 1144 | .first->second.get(); | ||
| 1145 | break; | ||
| 1146 | case RenderTargetFormat::R8G8_UNORM: | ||
| 1147 | return impl->converters_cache | ||
| 1148 | .emplace(format, std::make_unique<ConverterImpl<R8G8_UNORMTraits>>()) | ||
| 1149 | .first->second.get(); | ||
| 1150 | break; | ||
| 1151 | case RenderTargetFormat::R8G8_SNORM: | ||
| 1152 | return impl->converters_cache | ||
| 1153 | .emplace(format, std::make_unique<ConverterImpl<R8G8_SNORMTraits>>()) | ||
| 1154 | .first->second.get(); | ||
| 1155 | break; | ||
| 1156 | case RenderTargetFormat::R8G8_SINT: | ||
| 1157 | return impl->converters_cache | ||
| 1158 | .emplace(format, std::make_unique<ConverterImpl<R8G8_SINTTraits>>()) | ||
| 1159 | .first->second.get(); | ||
| 1160 | break; | ||
| 1161 | case RenderTargetFormat::R8G8_UINT: | ||
| 1162 | return impl->converters_cache | ||
| 1163 | .emplace(format, std::make_unique<ConverterImpl<R8G8_UINTTraits>>()) | ||
| 1164 | .first->second.get(); | ||
| 1165 | break; | ||
| 1166 | case RenderTargetFormat::R16_UNORM: | ||
| 1167 | return impl->converters_cache | ||
| 1168 | .emplace(format, std::make_unique<ConverterImpl<R16_UNORMTraits>>()) | ||
| 1169 | .first->second.get(); | ||
| 1170 | break; | ||
| 1171 | case RenderTargetFormat::R16_SNORM: | ||
| 1172 | return impl->converters_cache | ||
| 1173 | .emplace(format, std::make_unique<ConverterImpl<R16_SNORMTraits>>()) | ||
| 1174 | .first->second.get(); | ||
| 1175 | break; | ||
| 1176 | case RenderTargetFormat::R16_SINT: | ||
| 1177 | return impl->converters_cache | ||
| 1178 | .emplace(format, std::make_unique<ConverterImpl<R16_SINTTraits>>()) | ||
| 1179 | .first->second.get(); | ||
| 1180 | break; | ||
| 1181 | case RenderTargetFormat::R16_UINT: | ||
| 1182 | return impl->converters_cache | ||
| 1183 | .emplace(format, std::make_unique<ConverterImpl<R16_UINTTraits>>()) | ||
| 1184 | .first->second.get(); | ||
| 1185 | break; | ||
| 1186 | case RenderTargetFormat::R16_FLOAT: | ||
| 1187 | return impl->converters_cache | ||
| 1188 | .emplace(format, std::make_unique<ConverterImpl<R16_FLOATTraits>>()) | ||
| 1189 | .first->second.get(); | ||
| 1190 | break; | ||
| 1191 | case RenderTargetFormat::R8_UNORM: | ||
| 1192 | return impl->converters_cache | ||
| 1193 | .emplace(format, std::make_unique<ConverterImpl<R8_UNORMTraits>>()) | ||
| 1194 | .first->second.get(); | ||
| 1195 | break; | ||
| 1196 | case RenderTargetFormat::R8_SNORM: | ||
| 1197 | return impl->converters_cache | ||
| 1198 | .emplace(format, std::make_unique<ConverterImpl<R8_SNORMTraits>>()) | ||
| 1199 | .first->second.get(); | ||
| 1200 | break; | ||
| 1201 | case RenderTargetFormat::R8_SINT: | ||
| 1202 | return impl->converters_cache | ||
| 1203 | .emplace(format, std::make_unique<ConverterImpl<R8_SINTTraits>>()) | ||
| 1204 | .first->second.get(); | ||
| 1205 | break; | ||
| 1206 | case RenderTargetFormat::R8_UINT: | ||
| 1207 | return impl->converters_cache | ||
| 1208 | .emplace(format, std::make_unique<ConverterImpl<R8_UINTTraits>>()) | ||
| 1209 | .first->second.get(); | ||
| 1210 | break; | ||
| 1211 | case RenderTargetFormat::X1R5G5B5_UNORM: | ||
| 1212 | return impl->converters_cache | ||
| 1213 | .emplace(format, std::make_unique<ConverterImpl<X1R5G5B5_UNORMTraits>>()) | ||
| 1214 | .first->second.get(); | ||
| 1215 | break; | ||
| 1216 | case RenderTargetFormat::X8B8G8R8_UNORM: | ||
| 1217 | return impl->converters_cache | ||
| 1218 | .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_UNORMTraits>>()) | ||
| 1219 | .first->second.get(); | ||
| 1220 | break; | ||
| 1221 | case RenderTargetFormat::X8B8G8R8_SRGB: | ||
| 1222 | return impl->converters_cache | ||
| 1223 | .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_SRGBTraits>>()) | ||
| 1224 | .first->second.get(); | ||
| 1225 | break; | ||
| 1226 | default: { | ||
| 1227 | UNIMPLEMENTED_MSG("This format {} converter is not implemented", format); | ||
| 1228 | return impl->converters_cache.emplace(format, std::make_unique<NullConverter>()) | ||
| 1229 | .first->second.get(); | ||
| 1230 | } | ||
| 1231 | } | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h new file mode 100644 index 000000000..f9bdc516e --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.h | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <memory> | ||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | #include "video_core/gpu.h" | ||
| 12 | |||
| 13 | namespace Tegra::Engines::Blitter { | ||
| 14 | |||
| 15 | class Converter { | ||
| 16 | public: | ||
| 17 | virtual void ConvertTo(std::span<const u8> input, std::span<f32> output) = 0; | ||
| 18 | virtual void ConvertFrom(std::span<const f32> input, std::span<u8> output) = 0; | ||
| 19 | virtual ~Converter() = default; | ||
| 20 | }; | ||
| 21 | |||
| 22 | class ConverterFactory { | ||
| 23 | public: | ||
| 24 | ConverterFactory(); | ||
| 25 | ~ConverterFactory(); | ||
| 26 | |||
| 27 | Converter* GetFormatConverter(RenderTargetFormat format); | ||
| 28 | |||
| 29 | private: | ||
| 30 | Converter* BuildConverter(RenderTargetFormat format); | ||
| 31 | |||
| 32 | struct ConverterFactoryImpl; | ||
| 33 | std::unique_ptr<ConverterFactoryImpl> impl; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace Tegra::Engines::Blitter | ||
diff --git a/src/video_core/engines/sw_blitter/generate_converters.py b/src/video_core/engines/sw_blitter/generate_converters.py new file mode 100644 index 000000000..f641564f7 --- /dev/null +++ b/src/video_core/engines/sw_blitter/generate_converters.py | |||
| @@ -0,0 +1,136 @@ | |||
| 1 | # SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | # SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | import re | ||
| 5 | |||
| 6 | class Format: | ||
| 7 | def __init__(self, string_value): | ||
| 8 | self.name = string_value | ||
| 9 | tmp = string_value.split('_') | ||
| 10 | self.component_type = tmp[1] | ||
| 11 | component_data = re.findall(r"\w\d+", tmp[0]) | ||
| 12 | self.num_components = len(component_data) | ||
| 13 | sizes = [] | ||
| 14 | swizzle = [] | ||
| 15 | for data in component_data: | ||
| 16 | swizzle.append(data[0]) | ||
| 17 | sizes.append(int(data[1:])) | ||
| 18 | self.sizes = sizes | ||
| 19 | self.swizzle = swizzle | ||
| 20 | |||
| 21 | def build_component_type_array(self): | ||
| 22 | result = "{ " | ||
| 23 | b = False | ||
| 24 | for i in range(0, self.num_components): | ||
| 25 | if b: | ||
| 26 | result += ", " | ||
| 27 | b = True | ||
| 28 | result += "ComponentType::" + self.component_type | ||
| 29 | result += " }" | ||
| 30 | return result | ||
| 31 | |||
| 32 | def build_component_sizes_array(self): | ||
| 33 | result = "{ " | ||
| 34 | b = False | ||
| 35 | for i in range(0, self.num_components): | ||
| 36 | if b: | ||
| 37 | result += ", " | ||
| 38 | b = True | ||
| 39 | result += str(self.sizes[i]) | ||
| 40 | result += " }" | ||
| 41 | return result | ||
| 42 | |||
| 43 | def build_component_swizzle_array(self): | ||
| 44 | result = "{ " | ||
| 45 | b = False | ||
| 46 | for i in range(0, self.num_components): | ||
| 47 | if b: | ||
| 48 | result += ", " | ||
| 49 | b = True | ||
| 50 | swizzle = self.swizzle[i] | ||
| 51 | if swizzle == "X": | ||
| 52 | swizzle = "None" | ||
| 53 | result += "Swizzle::" + swizzle | ||
| 54 | result += " }" | ||
| 55 | return result | ||
| 56 | |||
| 57 | def print_declaration(self): | ||
| 58 | print("struct " + self.name + "Traits {") | ||
| 59 | print(" static constexpr size_t num_components = " + str(self.num_components) + ";") | ||
| 60 | print(" static constexpr std::array<ComponentType, num_components> component_types = " + self.build_component_type_array() + ";") | ||
| 61 | print(" static constexpr std::array<size_t, num_components> component_sizes = " + self.build_component_sizes_array() + ";") | ||
| 62 | print(" static constexpr std::array<Swizzle, num_components> component_swizzle = " + self.build_component_swizzle_array() + ";") | ||
| 63 | print("};\n") | ||
| 64 | |||
| 65 | def print_case(self): | ||
| 66 | print("case RenderTargetFormat::" + self.name + ":") | ||
| 67 | print(" return impl->converters_cache") | ||
| 68 | print(" .emplace(format, std::make_unique<ConverterImpl<" + self.name + "Traits>>())") | ||
| 69 | print(" .first->second.get();") | ||
| 70 | print(" break;") | ||
| 71 | |||
| 72 | txt = """ | ||
| 73 | R32G32B32A32_FLOAT | ||
| 74 | R32G32B32A32_SINT | ||
| 75 | R32G32B32A32_UINT | ||
| 76 | R32G32B32X32_FLOAT | ||
| 77 | R32G32B32X32_SINT | ||
| 78 | R32G32B32X32_UINT | ||
| 79 | R16G16B16A16_UNORM | ||
| 80 | R16G16B16A16_SNORM | ||
| 81 | R16G16B16A16_SINT | ||
| 82 | R16G16B16A16_UINT | ||
| 83 | R16G16B16A16_FLOAT | ||
| 84 | R32G32_FLOAT | ||
| 85 | R32G32_SINT | ||
| 86 | R32G32_UINT | ||
| 87 | R16G16B16X16_FLOAT | ||
| 88 | A8R8G8B8_UNORM | ||
| 89 | A8R8G8B8_SRGB | ||
| 90 | A2B10G10R10_UNORM | ||
| 91 | A2B10G10R10_UINT | ||
| 92 | A2R10G10B10_UNORM | ||
| 93 | A8B8G8R8_UNORM | ||
| 94 | A8B8G8R8_SRGB | ||
| 95 | A8B8G8R8_SNORM | ||
| 96 | A8B8G8R8_SINT | ||
| 97 | A8B8G8R8_UINT | ||
| 98 | R16G16_UNORM | ||
| 99 | R16G16_SNORM | ||
| 100 | R16G16_SINT | ||
| 101 | R16G16_UINT | ||
| 102 | R16G16_FLOAT | ||
| 103 | B10G11R11_FLOAT | ||
| 104 | R32_SINT | ||
| 105 | R32_UINT | ||
| 106 | R32_FLOAT | ||
| 107 | X8R8G8B8_UNORM | ||
| 108 | X8R8G8B8_SRGB | ||
| 109 | R5G6B5_UNORM | ||
| 110 | A1R5G5B5_UNORM | ||
| 111 | R8G8_UNORM | ||
| 112 | R8G8_SNORM | ||
| 113 | R8G8_SINT | ||
| 114 | R8G8_UINT | ||
| 115 | R16_UNORM | ||
| 116 | R16_SNORM | ||
| 117 | R16_SINT | ||
| 118 | R16_UINT | ||
| 119 | R16_FLOAT | ||
| 120 | R8_UNORM | ||
| 121 | R8_SNORM | ||
| 122 | R8_SINT | ||
| 123 | R8_UINT | ||
| 124 | X1R5G5B5_UNORM | ||
| 125 | X8B8G8R8_UNORM | ||
| 126 | X8B8G8R8_SRGB | ||
| 127 | """ | ||
| 128 | |||
| 129 | x = txt.split() | ||
| 130 | y = list(map(lambda a: Format(a), x)) | ||
| 131 | formats = list(y) | ||
| 132 | for format in formats: | ||
| 133 | format.print_declaration() | ||
| 134 | |||
| 135 | for format in formats: | ||
| 136 | format.print_case() | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d0709dc69..8a871593a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -27,12 +27,12 @@ struct CommandList; | |||
| 27 | // TODO: Implement the commented ones | 27 | // TODO: Implement the commented ones |
| 28 | enum class RenderTargetFormat : u32 { | 28 | enum class RenderTargetFormat : u32 { |
| 29 | NONE = 0x0, | 29 | NONE = 0x0, |
| 30 | R32B32G32A32_FLOAT = 0xC0, | 30 | R32G32B32A32_FLOAT = 0xC0, |
| 31 | R32G32B32A32_SINT = 0xC1, | 31 | R32G32B32A32_SINT = 0xC1, |
| 32 | R32G32B32A32_UINT = 0xC2, | 32 | R32G32B32A32_UINT = 0xC2, |
| 33 | // R32G32B32X32_FLOAT = 0xC3, | 33 | R32G32B32X32_FLOAT = 0xC3, |
| 34 | // R32G32B32X32_SINT = 0xC4, | 34 | R32G32B32X32_SINT = 0xC4, |
| 35 | // R32G32B32X32_UINT = 0xC5, | 35 | R32G32B32X32_UINT = 0xC5, |
| 36 | R16G16B16A16_UNORM = 0xC6, | 36 | R16G16B16A16_UNORM = 0xC6, |
| 37 | R16G16B16A16_SNORM = 0xC7, | 37 | R16G16B16A16_SNORM = 0xC7, |
| 38 | R16G16B16A16_SINT = 0xC8, | 38 | R16G16B16A16_SINT = 0xC8, |
| @@ -56,13 +56,13 @@ enum class RenderTargetFormat : u32 { | |||
| 56 | R16G16_SINT = 0xDC, | 56 | R16G16_SINT = 0xDC, |
| 57 | R16G16_UINT = 0xDD, | 57 | R16G16_UINT = 0xDD, |
| 58 | R16G16_FLOAT = 0xDE, | 58 | R16G16_FLOAT = 0xDE, |
| 59 | // A2R10G10B10_UNORM = 0xDF, | 59 | A2R10G10B10_UNORM = 0xDF, |
| 60 | B10G11R11_FLOAT = 0xE0, | 60 | B10G11R11_FLOAT = 0xE0, |
| 61 | R32_SINT = 0xE3, | 61 | R32_SINT = 0xE3, |
| 62 | R32_UINT = 0xE4, | 62 | R32_UINT = 0xE4, |
| 63 | R32_FLOAT = 0xE5, | 63 | R32_FLOAT = 0xE5, |
| 64 | // X8R8G8B8_UNORM = 0xE6, | 64 | X8R8G8B8_UNORM = 0xE6, |
| 65 | // X8R8G8B8_SRGB = 0xE7, | 65 | X8R8G8B8_SRGB = 0xE7, |
| 66 | R5G6B5_UNORM = 0xE8, | 66 | R5G6B5_UNORM = 0xE8, |
| 67 | A1R5G5B5_UNORM = 0xE9, | 67 | A1R5G5B5_UNORM = 0xE9, |
| 68 | R8G8_UNORM = 0xEA, | 68 | R8G8_UNORM = 0xEA, |
| @@ -79,11 +79,11 @@ enum class RenderTargetFormat : u32 { | |||
| 79 | R8_SINT = 0xF5, | 79 | R8_SINT = 0xF5, |
| 80 | R8_UINT = 0xF6, | 80 | R8_UINT = 0xF6, |
| 81 | 81 | ||
| 82 | /* | 82 | // A8_UNORM = 0xF7, |
| 83 | A8_UNORM = 0xF7, | ||
| 84 | X1R5G5B5_UNORM = 0xF8, | 83 | X1R5G5B5_UNORM = 0xF8, |
| 85 | X8B8G8R8_UNORM = 0xF9, | 84 | X8B8G8R8_UNORM = 0xF9, |
| 86 | X8B8G8R8_SRGB = 0xFA, | 85 | X8B8G8R8_SRGB = 0xFA, |
| 86 | /* | ||
| 87 | Z1R5G5B5_UNORM = 0xFB, | 87 | Z1R5G5B5_UNORM = 0xFB, |
| 88 | O1R5G5B5_UNORM = 0xFC, | 88 | O1R5G5B5_UNORM = 0xFC, |
| 89 | Z8R8G8B8_UNORM = 0xFD, | 89 | Z8R8G8B8_UNORM = 0xFD, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 354c6e429..f71a316b6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -466,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | |||
| 466 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 466 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 467 | MICROPROFILE_SCOPE(OpenGL_Blits); | 467 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 468 | std::scoped_lock lock{texture_cache.mutex}; | 468 | std::scoped_lock lock{texture_cache.mutex}; |
| 469 | texture_cache.BlitImage(dst, src, copy_config); | 469 | return texture_cache.BlitImage(dst, src, copy_config); |
| 470 | return true; | ||
| 471 | } | 470 | } |
| 472 | 471 | ||
| 473 | Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { | 472 | Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index e14f9b2db..ef1190e1f 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -28,6 +28,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB | |||
| 28 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM | 28 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM |
| 29 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM | 29 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM |
| 30 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT | 30 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT |
| 31 | {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2R10G10B10_UNORM | ||
| 31 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM | 32 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM |
| 32 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM | 33 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM |
| 33 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM | 34 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index b4f5ee665..430a84272 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -125,6 +125,7 @@ struct FormatTuple { | |||
| 125 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM | 125 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM |
| 126 | {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM | 126 | {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM |
| 127 | {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT | 127 | {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT |
| 128 | {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM | ||
| 128 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) | 129 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) |
| 129 | {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled) | 130 | {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled) |
| 130 | {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM | 131 | {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 12b13cc59..d8ad8815c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -542,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | |||
| 542 | const Tegra::Engines::Fermi2D::Surface& dst, | 542 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 543 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 543 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 544 | std::scoped_lock lock{texture_cache.mutex}; | 544 | std::scoped_lock lock{texture_cache.mutex}; |
| 545 | texture_cache.BlitImage(dst, src, copy_config); | 545 | return texture_cache.BlitImage(dst, src, copy_config); |
| 546 | return true; | ||
| 547 | } | 546 | } |
| 548 | 547 | ||
| 549 | Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { | 548 | Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6bd133d10..b618e1a25 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -93,11 +93,14 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { | |||
| 93 | 93 | ||
| 94 | PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { | 94 | PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { |
| 95 | switch (format) { | 95 | switch (format) { |
| 96 | case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT: | 96 | case Tegra::RenderTargetFormat::R32G32B32A32_FLOAT: |
| 97 | case Tegra::RenderTargetFormat::R32G32B32X32_FLOAT: | ||
| 97 | return PixelFormat::R32G32B32A32_FLOAT; | 98 | return PixelFormat::R32G32B32A32_FLOAT; |
| 98 | case Tegra::RenderTargetFormat::R32G32B32A32_SINT: | 99 | case Tegra::RenderTargetFormat::R32G32B32A32_SINT: |
| 100 | case Tegra::RenderTargetFormat::R32G32B32X32_SINT: | ||
| 99 | return PixelFormat::R32G32B32A32_SINT; | 101 | return PixelFormat::R32G32B32A32_SINT; |
| 100 | case Tegra::RenderTargetFormat::R32G32B32A32_UINT: | 102 | case Tegra::RenderTargetFormat::R32G32B32A32_UINT: |
| 103 | case Tegra::RenderTargetFormat::R32G32B32X32_UINT: | ||
| 101 | return PixelFormat::R32G32B32A32_UINT; | 104 | return PixelFormat::R32G32B32A32_UINT; |
| 102 | case Tegra::RenderTargetFormat::R16G16B16A16_UNORM: | 105 | case Tegra::RenderTargetFormat::R16G16B16A16_UNORM: |
| 103 | return PixelFormat::R16G16B16A16_UNORM; | 106 | return PixelFormat::R16G16B16A16_UNORM; |
| @@ -118,16 +121,22 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) | |||
| 118 | case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT: | 121 | case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT: |
| 119 | return PixelFormat::R16G16B16X16_FLOAT; | 122 | return PixelFormat::R16G16B16X16_FLOAT; |
| 120 | case Tegra::RenderTargetFormat::A8R8G8B8_UNORM: | 123 | case Tegra::RenderTargetFormat::A8R8G8B8_UNORM: |
| 124 | case Tegra::RenderTargetFormat::X8R8G8B8_UNORM: | ||
| 121 | return PixelFormat::B8G8R8A8_UNORM; | 125 | return PixelFormat::B8G8R8A8_UNORM; |
| 122 | case Tegra::RenderTargetFormat::A8R8G8B8_SRGB: | 126 | case Tegra::RenderTargetFormat::A8R8G8B8_SRGB: |
| 127 | case Tegra::RenderTargetFormat::X8R8G8B8_SRGB: | ||
| 123 | return PixelFormat::B8G8R8A8_SRGB; | 128 | return PixelFormat::B8G8R8A8_SRGB; |
| 124 | case Tegra::RenderTargetFormat::A2B10G10R10_UNORM: | 129 | case Tegra::RenderTargetFormat::A2B10G10R10_UNORM: |
| 125 | return PixelFormat::A2B10G10R10_UNORM; | 130 | return PixelFormat::A2B10G10R10_UNORM; |
| 126 | case Tegra::RenderTargetFormat::A2B10G10R10_UINT: | 131 | case Tegra::RenderTargetFormat::A2B10G10R10_UINT: |
| 127 | return PixelFormat::A2B10G10R10_UINT; | 132 | return PixelFormat::A2B10G10R10_UINT; |
| 133 | case Tegra::RenderTargetFormat::A2R10G10B10_UNORM: | ||
| 134 | return PixelFormat::A2R10G10B10_UNORM; | ||
| 128 | case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: | 135 | case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: |
| 136 | case Tegra::RenderTargetFormat::X8B8G8R8_UNORM: | ||
| 129 | return PixelFormat::A8B8G8R8_UNORM; | 137 | return PixelFormat::A8B8G8R8_UNORM; |
| 130 | case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: | 138 | case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: |
| 139 | case Tegra::RenderTargetFormat::X8B8G8R8_SRGB: | ||
| 131 | return PixelFormat::A8B8G8R8_SRGB; | 140 | return PixelFormat::A8B8G8R8_SRGB; |
| 132 | case Tegra::RenderTargetFormat::A8B8G8R8_SNORM: | 141 | case Tegra::RenderTargetFormat::A8B8G8R8_SNORM: |
| 133 | return PixelFormat::A8B8G8R8_SNORM; | 142 | return PixelFormat::A8B8G8R8_SNORM; |
| @@ -156,6 +165,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) | |||
| 156 | case Tegra::RenderTargetFormat::R5G6B5_UNORM: | 165 | case Tegra::RenderTargetFormat::R5G6B5_UNORM: |
| 157 | return PixelFormat::R5G6B5_UNORM; | 166 | return PixelFormat::R5G6B5_UNORM; |
| 158 | case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: | 167 | case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: |
| 168 | case Tegra::RenderTargetFormat::X1R5G5B5_UNORM: | ||
| 159 | return PixelFormat::A1R5G5B5_UNORM; | 169 | return PixelFormat::A1R5G5B5_UNORM; |
| 160 | case Tegra::RenderTargetFormat::R8G8_UNORM: | 170 | case Tegra::RenderTargetFormat::R8G8_UNORM: |
| 161 | return PixelFormat::R8G8_UNORM; | 171 | return PixelFormat::R8G8_UNORM; |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 57ca7f597..44b79af20 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -23,6 +23,7 @@ enum class PixelFormat { | |||
| 23 | A1R5G5B5_UNORM, | 23 | A1R5G5B5_UNORM, |
| 24 | A2B10G10R10_UNORM, | 24 | A2B10G10R10_UNORM, |
| 25 | A2B10G10R10_UINT, | 25 | A2B10G10R10_UINT, |
| 26 | A2R10G10B10_UNORM, | ||
| 26 | A1B5G5R5_UNORM, | 27 | A1B5G5R5_UNORM, |
| 27 | A5B5G5R1_UNORM, | 28 | A5B5G5R1_UNORM, |
| 28 | R8_UNORM, | 29 | R8_UNORM, |
| @@ -159,6 +160,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ | |||
| 159 | 1, // A1R5G5B5_UNORM | 160 | 1, // A1R5G5B5_UNORM |
| 160 | 1, // A2B10G10R10_UNORM | 161 | 1, // A2B10G10R10_UNORM |
| 161 | 1, // A2B10G10R10_UINT | 162 | 1, // A2B10G10R10_UINT |
| 163 | 1, // A2R10G10B10_UNORM | ||
| 162 | 1, // A1B5G5R5_UNORM | 164 | 1, // A1B5G5R5_UNORM |
| 163 | 1, // A5B5G5R1_UNORM | 165 | 1, // A5B5G5R1_UNORM |
| 164 | 1, // R8_UNORM | 166 | 1, // R8_UNORM |
| @@ -264,6 +266,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ | |||
| 264 | 1, // A1R5G5B5_UNORM | 266 | 1, // A1R5G5B5_UNORM |
| 265 | 1, // A2B10G10R10_UNORM | 267 | 1, // A2B10G10R10_UNORM |
| 266 | 1, // A2B10G10R10_UINT | 268 | 1, // A2B10G10R10_UINT |
| 269 | 1, // A2R10G10B10_UNORM | ||
| 267 | 1, // A1B5G5R5_UNORM | 270 | 1, // A1B5G5R5_UNORM |
| 268 | 1, // A5B5G5R1_UNORM | 271 | 1, // A5B5G5R1_UNORM |
| 269 | 1, // R8_UNORM | 272 | 1, // R8_UNORM |
| @@ -369,6 +372,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ | |||
| 369 | 16, // A1R5G5B5_UNORM | 372 | 16, // A1R5G5B5_UNORM |
| 370 | 32, // A2B10G10R10_UNORM | 373 | 32, // A2B10G10R10_UNORM |
| 371 | 32, // A2B10G10R10_UINT | 374 | 32, // A2B10G10R10_UINT |
| 375 | 32, // A2R10G10B10_UNORM | ||
| 372 | 16, // A1B5G5R5_UNORM | 376 | 16, // A1B5G5R5_UNORM |
| 373 | 16, // A5B5G5R1_UNORM | 377 | 16, // A5B5G5R1_UNORM |
| 374 | 8, // R8_UNORM | 378 | 8, // R8_UNORM |
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index acc854715..f1f0a057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -35,6 +35,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str | |||
| 35 | return "A2B10G10R10_UNORM"; | 35 | return "A2B10G10R10_UNORM"; |
| 36 | case PixelFormat::A2B10G10R10_UINT: | 36 | case PixelFormat::A2B10G10R10_UINT: |
| 37 | return "A2B10G10R10_UINT"; | 37 | return "A2B10G10R10_UINT"; |
| 38 | case PixelFormat::A2R10G10B10_UNORM: | ||
| 39 | return "A2R10G10B10_UNORM"; | ||
| 38 | case PixelFormat::A1B5G5R5_UNORM: | 40 | case PixelFormat::A1B5G5R5_UNORM: |
| 39 | return "A1B5G5R5_UNORM"; | 41 | return "A1B5G5R5_UNORM"; |
| 40 | case PixelFormat::A5B5G5R1_UNORM: | 42 | case PixelFormat::A5B5G5R1_UNORM: |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8ef75fe73..8e68a2e53 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -506,10 +506,14 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz | |||
| 506 | } | 506 | } |
| 507 | 507 | ||
| 508 | template <class P> | 508 | template <class P> |
| 509 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 509 | bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 510 | const Tegra::Engines::Fermi2D::Surface& src, | 510 | const Tegra::Engines::Fermi2D::Surface& src, |
| 511 | const Tegra::Engines::Fermi2D::Config& copy) { | 511 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 512 | const BlitImages images = GetBlitImages(dst, src, copy); | 512 | const auto result = GetBlitImages(dst, src, copy); |
| 513 | if (!result) { | ||
| 514 | return false; | ||
| 515 | } | ||
| 516 | const BlitImages images = *result; | ||
| 513 | const ImageId dst_id = images.dst_id; | 517 | const ImageId dst_id = images.dst_id; |
| 514 | const ImageId src_id = images.src_id; | 518 | const ImageId src_id = images.src_id; |
| 515 | 519 | ||
| @@ -596,6 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | |||
| 596 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | 600 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, |
| 597 | copy.operation); | 601 | copy.operation); |
| 598 | } | 602 | } |
| 603 | return true; | ||
| 599 | } | 604 | } |
| 600 | 605 | ||
| 601 | template <class P> | 606 | template <class P> |
| @@ -1133,7 +1138,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1133 | } | 1138 | } |
| 1134 | 1139 | ||
| 1135 | template <class P> | 1140 | template <class P> |
| 1136 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | 1141 | std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImages( |
| 1137 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, | 1142 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, |
| 1138 | const Tegra::Engines::Fermi2D::Config& copy) { | 1143 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 1139 | 1144 | ||
| @@ -1154,6 +1159,20 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | |||
| 1154 | has_deleted_images = false; | 1159 | has_deleted_images = false; |
| 1155 | src_id = FindImage(src_info, src_addr, try_options); | 1160 | src_id = FindImage(src_info, src_addr, try_options); |
| 1156 | dst_id = FindImage(dst_info, dst_addr, try_options); | 1161 | dst_id = FindImage(dst_info, dst_addr, try_options); |
| 1162 | if (!copy.must_accelerate) { | ||
| 1163 | do { | ||
| 1164 | if (!src_id && !dst_id) { | ||
| 1165 | return std::nullopt; | ||
| 1166 | } | ||
| 1167 | if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) { | ||
| 1168 | break; | ||
| 1169 | } | ||
| 1170 | if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) { | ||
| 1171 | break; | ||
| 1172 | } | ||
| 1173 | return std::nullopt; | ||
| 1174 | } while (false); | ||
| 1175 | } | ||
| 1157 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | 1176 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; |
| 1158 | if (src_image && src_image->info.num_samples > 1) { | 1177 | if (src_image && src_image->info.num_samples > 1) { |
| 1159 | RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; | 1178 | RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; |
| @@ -1194,12 +1213,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | |||
| 1194 | dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); | 1213 | dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); |
| 1195 | } while (has_deleted_images); | 1214 | } while (has_deleted_images); |
| 1196 | } | 1215 | } |
| 1197 | return BlitImages{ | 1216 | return {BlitImages{ |
| 1198 | .dst_id = dst_id, | 1217 | .dst_id = dst_id, |
| 1199 | .src_id = src_id, | 1218 | .src_id = src_id, |
| 1200 | .dst_format = dst_info.format, | 1219 | .dst_format = dst_info.format, |
| 1201 | .src_format = src_info.format, | 1220 | .src_format = src_info.format, |
| 1202 | }; | 1221 | }}; |
| 1203 | } | 1222 | } |
| 1204 | 1223 | ||
| 1205 | template <class P> | 1224 | template <class P> |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2fa8445eb..9db7195bf 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -174,7 +174,7 @@ public: | |||
| 174 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); | 174 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); |
| 175 | 175 | ||
| 176 | /// Blit an image with the given parameters | 176 | /// Blit an image with the given parameters |
| 177 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 177 | bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 178 | const Tegra::Engines::Fermi2D::Surface& src, | 178 | const Tegra::Engines::Fermi2D::Surface& src, |
| 179 | const Tegra::Engines::Fermi2D::Config& copy); | 179 | const Tegra::Engines::Fermi2D::Config& copy); |
| 180 | 180 | ||
| @@ -285,9 +285,9 @@ private: | |||
| 285 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | 285 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); |
| 286 | 286 | ||
| 287 | /// Return a blit image pair from the given guest blit parameters | 287 | /// Return a blit image pair from the given guest blit parameters |
| 288 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, | 288 | [[nodiscard]] std::optional<BlitImages> GetBlitImages( |
| 289 | const Tegra::Engines::Fermi2D::Surface& src, | 289 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, |
| 290 | const Tegra::Engines::Fermi2D::Config& copy); | 290 | const Tegra::Engines::Fermi2D::Config& copy); |
| 291 | 291 | ||
| 292 | /// Find or create a sampler from a guest descriptor sampler | 292 | /// Find or create a sampler from a guest descriptor sampler |
| 293 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); | 293 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index e670acc30..013ba0ceb 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui | |||
| @@ -231,6 +231,9 @@ | |||
| 231 | <property name="text"> | 231 | <property name="text"> |
| 232 | <string>Con&figure...</string> | 232 | <string>Con&figure...</string> |
| 233 | </property> | 233 | </property> |
| 234 | <property name="menuRole"> | ||
| 235 | <enum>QAction::PreferencesRole</enum> | ||
| 236 | </property> | ||
| 234 | </action> | 237 | </action> |
| 235 | <action name="action_Display_Dock_Widget_Headers"> | 238 | <action name="action_Display_Dock_Widget_Headers"> |
| 236 | <property name="checkable"> | 239 | <property name="checkable"> |
| @@ -363,6 +366,9 @@ | |||
| 363 | <property name="text"> | 366 | <property name="text"> |
| 364 | <string>&Configure TAS...</string> | 367 | <string>&Configure TAS...</string> |
| 365 | </property> | 368 | </property> |
| 369 | <property name="menuRole"> | ||
| 370 | <enum>QAction::NoRole</enum> | ||
| 371 | </property> | ||
| 366 | </action> | 372 | </action> |
| 367 | <action name="action_Configure_Current_Game"> | 373 | <action name="action_Configure_Current_Game"> |
| 368 | <property name="enabled"> | 374 | <property name="enabled"> |
| @@ -371,6 +377,9 @@ | |||
| 371 | <property name="text"> | 377 | <property name="text"> |
| 372 | <string>Configure C&urrent Game...</string> | 378 | <string>Configure C&urrent Game...</string> |
| 373 | </property> | 379 | </property> |
| 380 | <property name="menuRole"> | ||
| 381 | <enum>QAction::NoRole</enum> | ||
| 382 | </property> | ||
| 374 | </action> | 383 | </action> |
| 375 | <action name="action_TAS_Start"> | 384 | <action name="action_TAS_Start"> |
| 376 | <property name="enabled"> | 385 | <property name="enabled"> |