diff options
Diffstat (limited to 'src')
28 files changed, 2289 insertions, 344 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 24b7a083c..0a3e2f4d1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 32 | DEPENDS | 32 | DEPENDS |
| 33 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
| 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well |
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/src/core/settings.h b/src/core/settings.h index 36cd66fd4..33e1e06cd 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -437,7 +437,7 @@ struct Values { | |||
| 437 | bool renderer_debug; | 437 | bool renderer_debug; |
| 438 | int vulkan_device; | 438 | int vulkan_device; |
| 439 | 439 | ||
| 440 | float resolution_factor; | 440 | u16 resolution_factor{1}; |
| 441 | int aspect_ratio; | 441 | int aspect_ratio; |
| 442 | int max_anisotropy; | 442 | int max_anisotropy; |
| 443 | bool use_frame_limit; | 443 | bool use_frame_limit; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 39d5d8401..099bb446e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -52,6 +52,8 @@ add_library(video_core STATIC | |||
| 52 | rasterizer_interface.h | 52 | rasterizer_interface.h |
| 53 | renderer_base.cpp | 53 | renderer_base.cpp |
| 54 | renderer_base.h | 54 | renderer_base.h |
| 55 | renderer_opengl/gl_arb_decompiler.cpp | ||
| 56 | renderer_opengl/gl_arb_decompiler.h | ||
| 55 | renderer_opengl/gl_buffer_cache.cpp | 57 | renderer_opengl/gl_buffer_cache.cpp |
| 56 | renderer_opengl/gl_buffer_cache.h | 58 | renderer_opengl/gl_buffer_cache.h |
| 57 | renderer_opengl/gl_device.cpp | 59 | renderer_opengl/gl_device.cpp |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index e35ee0b67..e64170e66 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h | |||
| @@ -15,48 +15,47 @@ namespace VideoCommon { | |||
| 15 | 15 | ||
| 16 | class BufferBlock { | 16 | class BufferBlock { |
| 17 | public: | 17 | public: |
| 18 | bool Overlaps(const VAddr start, const VAddr end) const { | 18 | bool Overlaps(VAddr start, VAddr end) const { |
| 19 | return (cpu_addr < end) && (cpu_addr_end > start); | 19 | return (cpu_addr < end) && (cpu_addr_end > start); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | bool IsInside(const VAddr other_start, const VAddr other_end) const { | 22 | bool IsInside(VAddr other_start, VAddr other_end) const { |
| 23 | return cpu_addr <= other_start && other_end <= cpu_addr_end; | 23 | return cpu_addr <= other_start && other_end <= cpu_addr_end; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | std::size_t GetOffset(const VAddr in_addr) { | 26 | std::size_t Offset(VAddr in_addr) const { |
| 27 | return static_cast<std::size_t>(in_addr - cpu_addr); | 27 | return static_cast<std::size_t>(in_addr - cpu_addr); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | VAddr GetCpuAddr() const { | 30 | VAddr CpuAddr() const { |
| 31 | return cpu_addr; | 31 | return cpu_addr; |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | VAddr GetCpuAddrEnd() const { | 34 | VAddr CpuAddrEnd() const { |
| 35 | return cpu_addr_end; | 35 | return cpu_addr_end; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | void SetCpuAddr(const VAddr new_addr) { | 38 | void SetCpuAddr(VAddr new_addr) { |
| 39 | cpu_addr = new_addr; | 39 | cpu_addr = new_addr; |
| 40 | cpu_addr_end = new_addr + size; | 40 | cpu_addr_end = new_addr + size; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | std::size_t GetSize() const { | 43 | std::size_t Size() const { |
| 44 | return size; | 44 | return size; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void SetEpoch(u64 new_epoch) { | 47 | u64 Epoch() const { |
| 48 | epoch = new_epoch; | 48 | return epoch; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | u64 GetEpoch() { | 51 | void SetEpoch(u64 new_epoch) { |
| 52 | return epoch; | 52 | epoch = new_epoch; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | protected: | 55 | protected: |
| 56 | explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { | 56 | explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { |
| 57 | SetCpuAddr(cpu_addr); | 57 | SetCpuAddr(cpu_addr_); |
| 58 | } | 58 | } |
| 59 | ~BufferBlock() = default; | ||
| 60 | 59 | ||
| 61 | private: | 60 | private: |
| 62 | VAddr cpu_addr{}; | 61 | VAddr cpu_addr{}; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 77ae34339..308d8b55f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -30,12 +30,16 @@ | |||
| 30 | 30 | ||
| 31 | namespace VideoCommon { | 31 | namespace VideoCommon { |
| 32 | 32 | ||
| 33 | template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> | 33 | template <typename Buffer, typename BufferType, typename StreamBuffer> |
| 34 | class BufferCache { | 34 | class BufferCache { |
| 35 | using IntervalSet = boost::icl::interval_set<VAddr>; | 35 | using IntervalSet = boost::icl::interval_set<VAddr>; |
| 36 | using IntervalType = typename IntervalSet::interval_type; | 36 | using IntervalType = typename IntervalSet::interval_type; |
| 37 | using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; | 37 | using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; |
| 38 | 38 | ||
| 39 | static constexpr u64 WRITE_PAGE_BIT = 11; | ||
| 40 | static constexpr u64 BLOCK_PAGE_BITS = 21; | ||
| 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; | ||
| 42 | |||
| 39 | public: | 43 | public: |
| 40 | using BufferInfo = std::pair<BufferType, u64>; | 44 | using BufferInfo = std::pair<BufferType, u64>; |
| 41 | 45 | ||
| @@ -82,7 +86,7 @@ public: | |||
| 82 | } | 86 | } |
| 83 | } | 87 | } |
| 84 | 88 | ||
| 85 | OwnerBuffer block = GetBlock(cpu_addr, size); | 89 | Buffer* const block = GetBlock(cpu_addr, size); |
| 86 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); | 90 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 87 | if (!map) { | 91 | if (!map) { |
| 88 | return {GetEmptyBuffer(size), 0}; | 92 | return {GetEmptyBuffer(size), 0}; |
| @@ -98,7 +102,7 @@ public: | |||
| 98 | } | 102 | } |
| 99 | } | 103 | } |
| 100 | 104 | ||
| 101 | return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; | 105 | return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; |
| 102 | } | 106 | } |
| 103 | 107 | ||
| 104 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | 108 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
| @@ -129,16 +133,18 @@ public: | |||
| 129 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | 133 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
| 130 | } | 134 | } |
| 131 | 135 | ||
| 136 | /// Function called at the end of each frame, inteded for deferred operations | ||
| 132 | void TickFrame() { | 137 | void TickFrame() { |
| 133 | ++epoch; | 138 | ++epoch; |
| 139 | |||
| 134 | while (!pending_destruction.empty()) { | 140 | while (!pending_destruction.empty()) { |
| 135 | // Delay at least 4 frames before destruction. | 141 | // Delay at least 4 frames before destruction. |
| 136 | // This is due to triple buffering happening on some drivers. | 142 | // This is due to triple buffering happening on some drivers. |
| 137 | static constexpr u64 epochs_to_destroy = 5; | 143 | static constexpr u64 epochs_to_destroy = 5; |
| 138 | if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { | 144 | if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { |
| 139 | break; | 145 | break; |
| 140 | } | 146 | } |
| 141 | pending_destruction.pop_front(); | 147 | pending_destruction.pop(); |
| 142 | } | 148 | } |
| 143 | } | 149 | } |
| 144 | 150 | ||
| @@ -253,23 +259,21 @@ public: | |||
| 253 | 259 | ||
| 254 | protected: | 260 | protected: |
| 255 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 261 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 256 | std::unique_ptr<StreamBuffer> stream_buffer) | 262 | std::unique_ptr<StreamBuffer> stream_buffer_) |
| 257 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, | 263 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, |
| 258 | stream_buffer_handle{this->stream_buffer->GetHandle()} {} | 264 | stream_buffer_handle{stream_buffer->Handle()} {} |
| 259 | 265 | ||
| 260 | ~BufferCache() = default; | 266 | ~BufferCache() = default; |
| 261 | 267 | ||
| 262 | virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; | 268 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; |
| 263 | 269 | ||
| 264 | virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; | 270 | virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 265 | |||
| 266 | virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, | ||
| 267 | const u8* data) = 0; | 271 | const u8* data) = 0; |
| 268 | 272 | ||
| 269 | virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, | 273 | virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 270 | u8* data) = 0; | 274 | u8* data) = 0; |
| 271 | 275 | ||
| 272 | virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset, | 276 | virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 273 | std::size_t dst_offset, std::size_t size) = 0; | 277 | std::size_t dst_offset, std::size_t size) = 0; |
| 274 | 278 | ||
| 275 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { | 279 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { |
| @@ -325,7 +329,7 @@ protected: | |||
| 325 | } | 329 | } |
| 326 | 330 | ||
| 327 | private: | 331 | private: |
| 328 | MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, | 332 | MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, |
| 329 | std::size_t size) { | 333 | std::size_t size) { |
| 330 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); | 334 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); |
| 331 | if (overlaps.empty()) { | 335 | if (overlaps.empty()) { |
| @@ -333,11 +337,11 @@ private: | |||
| 333 | const VAddr cpu_addr_end = cpu_addr + size; | 337 | const VAddr cpu_addr_end = cpu_addr + size; |
| 334 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 338 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 335 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | 339 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); |
| 336 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); | 340 | UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); |
| 337 | } else { | 341 | } else { |
| 338 | staging_buffer.resize(size); | 342 | staging_buffer.resize(size); |
| 339 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | 343 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
| 340 | UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); | 344 | UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); |
| 341 | } | 345 | } |
| 342 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); | 346 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); |
| 343 | } | 347 | } |
| @@ -380,7 +384,7 @@ private: | |||
| 380 | return map; | 384 | return map; |
| 381 | } | 385 | } |
| 382 | 386 | ||
| 383 | void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, | 387 | void UpdateBlock(const Buffer* block, VAddr start, VAddr end, |
| 384 | const VectorMapInterval& overlaps) { | 388 | const VectorMapInterval& overlaps) { |
| 385 | const IntervalType base_interval{start, end}; | 389 | const IntervalType base_interval{start, end}; |
| 386 | IntervalSet interval_set{}; | 390 | IntervalSet interval_set{}; |
| @@ -390,13 +394,13 @@ private: | |||
| 390 | interval_set.subtract(subtract); | 394 | interval_set.subtract(subtract); |
| 391 | } | 395 | } |
| 392 | for (auto& interval : interval_set) { | 396 | for (auto& interval : interval_set) { |
| 393 | std::size_t size = interval.upper() - interval.lower(); | 397 | const std::size_t size = interval.upper() - interval.lower(); |
| 394 | if (size > 0) { | 398 | if (size == 0) { |
| 395 | staging_buffer.resize(size); | 399 | continue; |
| 396 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | ||
| 397 | UploadBlockData(block, block->GetOffset(interval.lower()), size, | ||
| 398 | staging_buffer.data()); | ||
| 399 | } | 400 | } |
| 401 | staging_buffer.resize(size); | ||
| 402 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | ||
| 403 | UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); | ||
| 400 | } | 404 | } |
| 401 | } | 405 | } |
| 402 | 406 | ||
| @@ -426,10 +430,14 @@ private: | |||
| 426 | } | 430 | } |
| 427 | 431 | ||
| 428 | void FlushMap(MapInterval* map) { | 432 | void FlushMap(MapInterval* map) { |
| 433 | const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); | ||
| 434 | ASSERT_OR_EXECUTE(it != blocks.end(), return;); | ||
| 435 | |||
| 436 | std::shared_ptr<Buffer> block = it->second; | ||
| 437 | |||
| 429 | const std::size_t size = map->end - map->start; | 438 | const std::size_t size = map->end - map->start; |
| 430 | OwnerBuffer block = blocks[map->start >> block_page_bits]; | ||
| 431 | staging_buffer.resize(size); | 439 | staging_buffer.resize(size); |
| 432 | DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); | 440 | DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); |
| 433 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); | 441 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); |
| 434 | map->MarkAsModified(false, 0); | 442 | map->MarkAsModified(false, 0); |
| 435 | } | 443 | } |
| @@ -452,97 +460,89 @@ private: | |||
| 452 | buffer_offset = offset_aligned; | 460 | buffer_offset = offset_aligned; |
| 453 | } | 461 | } |
| 454 | 462 | ||
| 455 | OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { | 463 | std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { |
| 456 | const std::size_t old_size = buffer->GetSize(); | 464 | const std::size_t old_size = buffer->Size(); |
| 457 | const std::size_t new_size = old_size + block_page_size; | 465 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; |
| 458 | const VAddr cpu_addr = buffer->GetCpuAddr(); | 466 | const VAddr cpu_addr = buffer->CpuAddr(); |
| 459 | OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); | 467 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); |
| 460 | CopyBlock(buffer, new_buffer, 0, 0, old_size); | 468 | CopyBlock(*buffer, *new_buffer, 0, 0, old_size); |
| 461 | buffer->SetEpoch(epoch); | 469 | QueueDestruction(std::move(buffer)); |
| 462 | pending_destruction.push_back(buffer); | 470 | |
| 463 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; | 471 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; |
| 464 | u64 page_start = cpu_addr >> block_page_bits; | 472 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; |
| 465 | const u64 page_end = cpu_addr_end >> block_page_bits; | 473 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { |
| 466 | while (page_start <= page_end) { | 474 | blocks.insert_or_assign(page_start, new_buffer); |
| 467 | blocks[page_start] = new_buffer; | ||
| 468 | ++page_start; | ||
| 469 | } | 475 | } |
| 476 | |||
| 470 | return new_buffer; | 477 | return new_buffer; |
| 471 | } | 478 | } |
| 472 | 479 | ||
| 473 | OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { | 480 | std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, |
| 474 | const std::size_t size_1 = first->GetSize(); | 481 | std::shared_ptr<Buffer> second) { |
| 475 | const std::size_t size_2 = second->GetSize(); | 482 | const std::size_t size_1 = first->Size(); |
| 476 | const VAddr first_addr = first->GetCpuAddr(); | 483 | const std::size_t size_2 = second->Size(); |
| 477 | const VAddr second_addr = second->GetCpuAddr(); | 484 | const VAddr first_addr = first->CpuAddr(); |
| 485 | const VAddr second_addr = second->CpuAddr(); | ||
| 478 | const VAddr new_addr = std::min(first_addr, second_addr); | 486 | const VAddr new_addr = std::min(first_addr, second_addr); |
| 479 | const std::size_t new_size = size_1 + size_2; | 487 | const std::size_t new_size = size_1 + size_2; |
| 480 | OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); | 488 | |
| 481 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | 489 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); |
| 482 | CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); | 490 | CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); |
| 483 | first->SetEpoch(epoch); | 491 | CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); |
| 484 | second->SetEpoch(epoch); | 492 | QueueDestruction(std::move(first)); |
| 485 | pending_destruction.push_back(first); | 493 | QueueDestruction(std::move(second)); |
| 486 | pending_destruction.push_back(second); | 494 | |
| 487 | const VAddr cpu_addr_end = new_addr + new_size - 1; | 495 | const VAddr cpu_addr_end = new_addr + new_size - 1; |
| 488 | u64 page_start = new_addr >> block_page_bits; | 496 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; |
| 489 | const u64 page_end = cpu_addr_end >> block_page_bits; | 497 | for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { |
| 490 | while (page_start <= page_end) { | 498 | blocks.insert_or_assign(page_start, new_buffer); |
| 491 | blocks[page_start] = new_buffer; | ||
| 492 | ++page_start; | ||
| 493 | } | 499 | } |
| 494 | return new_buffer; | 500 | return new_buffer; |
| 495 | } | 501 | } |
| 496 | 502 | ||
| 497 | OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { | 503 | Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { |
| 498 | OwnerBuffer found; | 504 | std::shared_ptr<Buffer> found; |
| 505 | |||
| 499 | const VAddr cpu_addr_end = cpu_addr + size - 1; | 506 | const VAddr cpu_addr_end = cpu_addr + size - 1; |
| 500 | u64 page_start = cpu_addr >> block_page_bits; | 507 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; |
| 501 | const u64 page_end = cpu_addr_end >> block_page_bits; | 508 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { |
| 502 | while (page_start <= page_end) { | ||
| 503 | auto it = blocks.find(page_start); | 509 | auto it = blocks.find(page_start); |
| 504 | if (it == blocks.end()) { | 510 | if (it == blocks.end()) { |
| 505 | if (found) { | 511 | if (found) { |
| 506 | found = EnlargeBlock(found); | 512 | found = EnlargeBlock(found); |
| 507 | } else { | 513 | continue; |
| 508 | const VAddr start_addr = (page_start << block_page_bits); | ||
| 509 | found = CreateBlock(start_addr, block_page_size); | ||
| 510 | blocks[page_start] = found; | ||
| 511 | } | ||
| 512 | } else { | ||
| 513 | if (found) { | ||
| 514 | if (found == it->second) { | ||
| 515 | ++page_start; | ||
| 516 | continue; | ||
| 517 | } | ||
| 518 | found = MergeBlocks(found, it->second); | ||
| 519 | } else { | ||
| 520 | found = it->second; | ||
| 521 | } | 514 | } |
| 515 | const VAddr start_addr = page_start << BLOCK_PAGE_BITS; | ||
| 516 | found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); | ||
| 517 | blocks.insert_or_assign(page_start, found); | ||
| 518 | continue; | ||
| 519 | } | ||
| 520 | if (!found) { | ||
| 521 | found = it->second; | ||
| 522 | continue; | ||
| 523 | } | ||
| 524 | if (found != it->second) { | ||
| 525 | found = MergeBlocks(std::move(found), it->second); | ||
| 522 | } | 526 | } |
| 523 | ++page_start; | ||
| 524 | } | 527 | } |
| 525 | return found; | 528 | return found.get(); |
| 526 | } | 529 | } |
| 527 | 530 | ||
| 528 | void MarkRegionAsWritten(const VAddr start, const VAddr end) { | 531 | void MarkRegionAsWritten(VAddr start, VAddr end) { |
| 529 | u64 page_start = start >> write_page_bit; | 532 | const u64 page_end = end >> WRITE_PAGE_BIT; |
| 530 | const u64 page_end = end >> write_page_bit; | 533 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { |
| 531 | while (page_start <= page_end) { | ||
| 532 | auto it = written_pages.find(page_start); | 534 | auto it = written_pages.find(page_start); |
| 533 | if (it != written_pages.end()) { | 535 | if (it != written_pages.end()) { |
| 534 | it->second = it->second + 1; | 536 | it->second = it->second + 1; |
| 535 | } else { | 537 | } else { |
| 536 | written_pages[page_start] = 1; | 538 | written_pages.insert_or_assign(page_start, 1); |
| 537 | } | 539 | } |
| 538 | ++page_start; | ||
| 539 | } | 540 | } |
| 540 | } | 541 | } |
| 541 | 542 | ||
| 542 | void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { | 543 | void UnmarkRegionAsWritten(VAddr start, VAddr end) { |
| 543 | u64 page_start = start >> write_page_bit; | 544 | const u64 page_end = end >> WRITE_PAGE_BIT; |
| 544 | const u64 page_end = end >> write_page_bit; | 545 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { |
| 545 | while (page_start <= page_end) { | ||
| 546 | auto it = written_pages.find(page_start); | 546 | auto it = written_pages.find(page_start); |
| 547 | if (it != written_pages.end()) { | 547 | if (it != written_pages.end()) { |
| 548 | if (it->second > 1) { | 548 | if (it->second > 1) { |
| @@ -551,22 +551,24 @@ private: | |||
| 551 | written_pages.erase(it); | 551 | written_pages.erase(it); |
| 552 | } | 552 | } |
| 553 | } | 553 | } |
| 554 | ++page_start; | ||
| 555 | } | 554 | } |
| 556 | } | 555 | } |
| 557 | 556 | ||
| 558 | bool IsRegionWritten(const VAddr start, const VAddr end) const { | 557 | bool IsRegionWritten(VAddr start, VAddr end) const { |
| 559 | u64 page_start = start >> write_page_bit; | 558 | const u64 page_end = end >> WRITE_PAGE_BIT; |
| 560 | const u64 page_end = end >> write_page_bit; | 559 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { |
| 561 | while (page_start <= page_end) { | ||
| 562 | if (written_pages.count(page_start) > 0) { | 560 | if (written_pages.count(page_start) > 0) { |
| 563 | return true; | 561 | return true; |
| 564 | } | 562 | } |
| 565 | ++page_start; | ||
| 566 | } | 563 | } |
| 567 | return false; | 564 | return false; |
| 568 | } | 565 | } |
| 569 | 566 | ||
| 567 | void QueueDestruction(std::shared_ptr<Buffer> buffer) { | ||
| 568 | buffer->SetEpoch(epoch); | ||
| 569 | pending_destruction.push(std::move(buffer)); | ||
| 570 | } | ||
| 571 | |||
| 570 | void MarkForAsyncFlush(MapInterval* map) { | 572 | void MarkForAsyncFlush(MapInterval* map) { |
| 571 | if (!uncommitted_flushes) { | 573 | if (!uncommitted_flushes) { |
| 572 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); | 574 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); |
| @@ -578,7 +580,7 @@ private: | |||
| 578 | Core::System& system; | 580 | Core::System& system; |
| 579 | 581 | ||
| 580 | std::unique_ptr<StreamBuffer> stream_buffer; | 582 | std::unique_ptr<StreamBuffer> stream_buffer; |
| 581 | BufferType stream_buffer_handle{}; | 583 | BufferType stream_buffer_handle; |
| 582 | 584 | ||
| 583 | u8* buffer_ptr = nullptr; | 585 | u8* buffer_ptr = nullptr; |
| 584 | u64 buffer_offset = 0; | 586 | u64 buffer_offset = 0; |
| @@ -588,18 +590,15 @@ private: | |||
| 588 | boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> | 590 | boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> |
| 589 | mapped_addresses; | 591 | mapped_addresses; |
| 590 | 592 | ||
| 591 | static constexpr u64 write_page_bit = 11; | ||
| 592 | std::unordered_map<u64, u32> written_pages; | 593 | std::unordered_map<u64, u32> written_pages; |
| 594 | std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; | ||
| 593 | 595 | ||
| 594 | static constexpr u64 block_page_bits = 21; | 596 | std::queue<std::shared_ptr<Buffer>> pending_destruction; |
| 595 | static constexpr u64 block_page_size = 1ULL << block_page_bits; | ||
| 596 | std::unordered_map<u64, OwnerBuffer> blocks; | ||
| 597 | |||
| 598 | std::list<OwnerBuffer> pending_destruction; | ||
| 599 | u64 epoch = 0; | 597 | u64 epoch = 0; |
| 600 | u64 modified_ticks = 0; | 598 | u64 modified_ticks = 0; |
| 601 | 599 | ||
| 602 | std::vector<u8> staging_buffer; | 600 | std::vector<u8> staging_buffer; |
| 601 | |||
| 603 | std::list<MapInterval*> marked_for_unregister; | 602 | std::list<MapInterval*> marked_for_unregister; |
| 604 | 603 | ||
| 605 | std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; | 604 | std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; |
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 2d82c8cff..d4a97ec7b 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -14,22 +14,16 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 | |||
| 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); |
| 15 | 15 | ||
| 16 | namespace Tegra { | 16 | namespace Tegra { |
| 17 | static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; | 17 | static const Xbyak::Reg64 STATE = Xbyak::util::rbx; |
| 18 | static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; | 18 | static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; |
| 19 | static const Xbyak::Reg64 STATE = Xbyak::util::r11; | 19 | static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; |
| 20 | static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; | ||
| 21 | static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; | ||
| 22 | static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; | ||
| 23 | static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | 20 | static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
| 24 | static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; | ||
| 25 | static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | 21 | static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
| 26 | 22 | ||
| 27 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | 23 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
| 28 | PARAMETERS, | ||
| 29 | REGISTERS, | ||
| 30 | STATE, | 24 | STATE, |
| 31 | NEXT_PARAMETER, | ||
| 32 | RESULT, | 25 | RESULT, |
| 26 | PARAMETERS, | ||
| 33 | METHOD_ADDRESS, | 27 | METHOD_ADDRESS, |
| 34 | BRANCH_HOLDER, | 28 | BRANCH_HOLDER, |
| 35 | }); | 29 | }); |
| @@ -53,8 +47,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | |||
| 53 | JITState state{}; | 47 | JITState state{}; |
| 54 | state.maxwell3d = &maxwell3d; | 48 | state.maxwell3d = &maxwell3d; |
| 55 | state.registers = {}; | 49 | state.registers = {}; |
| 56 | state.parameters = parameters.data(); | 50 | program(&state, parameters.data()); |
| 57 | program(&state); | ||
| 58 | } | 51 | } |
| 59 | 52 | ||
| 60 | void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { | 53 | void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { |
| @@ -64,18 +57,18 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { | |||
| 64 | const bool is_move_operation = !is_a_zero && is_b_zero; | 57 | const bool is_move_operation = !is_a_zero && is_b_zero; |
| 65 | const bool has_zero_register = is_a_zero || is_b_zero; | 58 | const bool has_zero_register = is_a_zero || is_b_zero; |
| 66 | 59 | ||
| 67 | Xbyak::Reg64 src_a; | 60 | Xbyak::Reg32 src_a; |
| 68 | Xbyak::Reg32 src_b; | 61 | Xbyak::Reg32 src_b; |
| 69 | 62 | ||
| 70 | if (!optimizer.zero_reg_skip) { | 63 | if (!optimizer.zero_reg_skip) { |
| 71 | src_a = Compile_GetRegister(opcode.src_a, RESULT_64); | 64 | src_a = Compile_GetRegister(opcode.src_a, RESULT); |
| 72 | src_b = Compile_GetRegister(opcode.src_b, ebx); | 65 | src_b = Compile_GetRegister(opcode.src_b, eax); |
| 73 | } else { | 66 | } else { |
| 74 | if (!is_a_zero) { | 67 | if (!is_a_zero) { |
| 75 | src_a = Compile_GetRegister(opcode.src_a, RESULT_64); | 68 | src_a = Compile_GetRegister(opcode.src_a, RESULT); |
| 76 | } | 69 | } |
| 77 | if (!is_b_zero) { | 70 | if (!is_b_zero) { |
| 78 | src_b = Compile_GetRegister(opcode.src_b, ebx); | 71 | src_b = Compile_GetRegister(opcode.src_b, eax); |
| 79 | } | 72 | } |
| 80 | } | 73 | } |
| 81 | Xbyak::Label skip_carry{}; | 74 | Xbyak::Label skip_carry{}; |
| @@ -329,7 +322,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | |||
| 329 | and_(METHOD_ADDRESS, 0xfff); | 322 | and_(METHOD_ADDRESS, 0xfff); |
| 330 | shr(ecx, 12); | 323 | shr(ecx, 12); |
| 331 | and_(ecx, 0x3f); | 324 | and_(ecx, 0x3f); |
| 332 | lea(eax, ptr[rcx + METHOD_ADDRESS_64]); | 325 | lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); |
| 333 | sal(ecx, 12); | 326 | sal(ecx, 12); |
| 334 | or_(eax, ecx); | 327 | or_(eax, ecx); |
| 335 | 328 | ||
| @@ -424,16 +417,12 @@ void MacroJITx64Impl::Compile() { | |||
| 424 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | 417 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
| 425 | // JIT state | 418 | // JIT state |
| 426 | mov(STATE, Common::X64::ABI_PARAM1); | 419 | mov(STATE, Common::X64::ABI_PARAM1); |
| 427 | mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + | 420 | mov(PARAMETERS, Common::X64::ABI_PARAM2); |
| 428 | static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]); | ||
| 429 | mov(REGISTERS, Common::X64::ABI_PARAM1); | ||
| 430 | add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers))); | ||
| 431 | xor_(RESULT, RESULT); | 421 | xor_(RESULT, RESULT); |
| 432 | xor_(METHOD_ADDRESS, METHOD_ADDRESS); | 422 | xor_(METHOD_ADDRESS, METHOD_ADDRESS); |
| 433 | xor_(NEXT_PARAMETER, NEXT_PARAMETER); | ||
| 434 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | 423 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
| 435 | 424 | ||
| 436 | mov(dword[REGISTERS + 4], Compile_FetchParameter()); | 425 | mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); |
| 437 | 426 | ||
| 438 | // Track get register for zero registers and mark it as no-op | 427 | // Track get register for zero registers and mark it as no-op |
| 439 | optimizer.zero_reg_skip = true; | 428 | optimizer.zero_reg_skip = true; |
| @@ -537,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { | |||
| 537 | } | 526 | } |
| 538 | 527 | ||
| 539 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | 528 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { |
| 540 | mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); | 529 | mov(eax, dword[PARAMETERS]); |
| 541 | inc(NEXT_PARAMETER); | 530 | add(PARAMETERS, sizeof(u32)); |
| 542 | return eax; | 531 | return eax; |
| 543 | } | 532 | } |
| 544 | 533 | ||
| @@ -547,31 +536,12 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { | |||
| 547 | // Register 0 is always zero | 536 | // Register 0 is always zero |
| 548 | xor_(dst, dst); | 537 | xor_(dst, dst); |
| 549 | } else { | 538 | } else { |
| 550 | mov(dst, dword[REGISTERS + index * sizeof(u32)]); | 539 | mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); |
| 551 | } | 540 | } |
| 552 | 541 | ||
| 553 | return dst; | 542 | return dst; |
| 554 | } | 543 | } |
| 555 | 544 | ||
| 556 | Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { | ||
| 557 | if (index == 0) { | ||
| 558 | // Register 0 is always zero | ||
| 559 | xor_(dst, dst); | ||
| 560 | } else { | ||
| 561 | mov(dst, dword[REGISTERS + index * sizeof(u32)]); | ||
| 562 | } | ||
| 563 | |||
| 564 | return dst; | ||
| 565 | } | ||
| 566 | |||
| 567 | void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { | ||
| 568 | Xbyak::Label zero{}, end{}; | ||
| 569 | xor_(ecx, ecx); | ||
| 570 | shr(dst, 32); | ||
| 571 | setne(cl); | ||
| 572 | mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); | ||
| 573 | } | ||
| 574 | |||
| 575 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { | 545 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { |
| 576 | auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { | 546 | auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { |
| 577 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero | 547 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero |
| @@ -579,7 +549,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 | |||
| 579 | if (reg == 0) { | 549 | if (reg == 0) { |
| 580 | return; | 550 | return; |
| 581 | } | 551 | } |
| 582 | mov(dword[REGISTERS + reg * sizeof(u32)], result); | 552 | mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); |
| 583 | }; | 553 | }; |
| 584 | auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; | 554 | auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; |
| 585 | 555 | ||
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 21ee157cf..51ec090b8 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h | |||
| @@ -55,8 +55,6 @@ private: | |||
| 55 | 55 | ||
| 56 | Xbyak::Reg32 Compile_FetchParameter(); | 56 | Xbyak::Reg32 Compile_FetchParameter(); |
| 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); |
| 58 | Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst); | ||
| 59 | void Compile_WriteCarry(Xbyak::Reg64 dst); | ||
| 60 | 58 | ||
| 61 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | 59 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); |
| 62 | void Compile_Send(Xbyak::Reg32 value); | 60 | void Compile_Send(Xbyak::Reg32 value); |
| @@ -67,11 +65,10 @@ private: | |||
| 67 | struct JITState { | 65 | struct JITState { |
| 68 | Engines::Maxwell3D* maxwell3d{}; | 66 | Engines::Maxwell3D* maxwell3d{}; |
| 69 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | 67 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; |
| 70 | const u32* parameters{}; | ||
| 71 | u32 carry_flag{}; | 68 | u32 carry_flag{}; |
| 72 | }; | 69 | }; |
| 73 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | 70 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); |
| 74 | using ProgramType = void (*)(JITState*); | 71 | using ProgramType = void (*)(JITState*, const u32*); |
| 75 | 72 | ||
| 76 | struct OptimizerState { | 73 | struct OptimizerState { |
| 77 | bool can_skip_carry{}; | 74 | bool can_skip_carry{}; |
| @@ -85,8 +82,8 @@ private: | |||
| 85 | std::optional<Macro::Opcode> next_opcode{}; | 82 | std::optional<Macro::Opcode> next_opcode{}; |
| 86 | ProgramType program{nullptr}; | 83 | ProgramType program{nullptr}; |
| 87 | 84 | ||
| 88 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels{}; | 85 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels; |
| 89 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip{}; | 86 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; |
| 90 | Xbyak::Label end_of_code{}; | 87 | Xbyak::Label end_of_code{}; |
| 91 | 88 | ||
| 92 | bool is_delay_slot{}; | 89 | bool is_delay_slot{}; |
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp new file mode 100644 index 000000000..1e96b0310 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -0,0 +1,2074 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | ASSERT(component < 4); | ||
| 43 | return component["xyzw"]; | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | std::string GlobalMemoryName(const GlobalMemoryBase& base) { | ||
| 189 | return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); | ||
| 190 | } | ||
| 191 | |||
| 192 | class ARBDecompiler final { | ||
| 193 | public: | ||
| 194 | explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 195 | ShaderType stage, std::string_view identifier); | ||
| 196 | |||
| 197 | std::string Code() const { | ||
| 198 | return shader_source; | ||
| 199 | } | ||
| 200 | |||
| 201 | private: | ||
| 202 | void DeclareHeader(); | ||
| 203 | void DeclareVertex(); | ||
| 204 | void DeclareGeometry(); | ||
| 205 | void DeclareFragment(); | ||
| 206 | void DeclareCompute(); | ||
| 207 | void DeclareInputAttributes(); | ||
| 208 | void DeclareOutputAttributes(); | ||
| 209 | void DeclareLocalMemory(); | ||
| 210 | void DeclareGlobalMemory(); | ||
| 211 | void DeclareConstantBuffers(); | ||
| 212 | void DeclareRegisters(); | ||
| 213 | void DeclareTemporaries(); | ||
| 214 | void DeclarePredicates(); | ||
| 215 | void DeclareInternalFlags(); | ||
| 216 | |||
| 217 | void InitializeVariables(); | ||
| 218 | |||
| 219 | void DecompileAST(); | ||
| 220 | void DecompileBranchMode(); | ||
| 221 | |||
| 222 | void VisitAST(const ASTNode& node); | ||
| 223 | std::string VisitExpression(const Expr& node); | ||
| 224 | |||
| 225 | void VisitBlock(const NodeBlock& bb); | ||
| 226 | |||
| 227 | std::string Visit(const Node& node); | ||
| 228 | |||
| 229 | std::pair<std::string, std::size_t> BuildCoords(Operation); | ||
| 230 | std::string BuildAoffi(Operation); | ||
| 231 | void Exit(); | ||
| 232 | |||
| 233 | std::string Assign(Operation); | ||
| 234 | std::string Select(Operation); | ||
| 235 | std::string FClamp(Operation); | ||
| 236 | std::string FCastHalf0(Operation); | ||
| 237 | std::string FCastHalf1(Operation); | ||
| 238 | std::string FSqrt(Operation); | ||
| 239 | std::string FSwizzleAdd(Operation); | ||
| 240 | std::string HAdd2(Operation); | ||
| 241 | std::string HMul2(Operation); | ||
| 242 | std::string HFma2(Operation); | ||
| 243 | std::string HAbsolute(Operation); | ||
| 244 | std::string HNegate(Operation); | ||
| 245 | std::string HClamp(Operation); | ||
| 246 | std::string HCastFloat(Operation); | ||
| 247 | std::string HUnpack(Operation); | ||
| 248 | std::string HMergeF32(Operation); | ||
| 249 | std::string HMergeH0(Operation); | ||
| 250 | std::string HMergeH1(Operation); | ||
| 251 | std::string HPack2(Operation); | ||
| 252 | std::string LogicalAssign(Operation); | ||
| 253 | std::string LogicalPick2(Operation); | ||
| 254 | std::string LogicalAnd2(Operation); | ||
| 255 | std::string FloatOrdered(Operation); | ||
| 256 | std::string FloatUnordered(Operation); | ||
| 257 | std::string LogicalAddCarry(Operation); | ||
| 258 | std::string Texture(Operation); | ||
| 259 | std::string TextureGather(Operation); | ||
| 260 | std::string TextureQueryDimensions(Operation); | ||
| 261 | std::string TextureQueryLod(Operation); | ||
| 262 | std::string TexelFetch(Operation); | ||
| 263 | std::string TextureGradient(Operation); | ||
| 264 | std::string ImageLoad(Operation); | ||
| 265 | std::string ImageStore(Operation); | ||
| 266 | std::string Branch(Operation); | ||
| 267 | std::string BranchIndirect(Operation); | ||
| 268 | std::string PushFlowStack(Operation); | ||
| 269 | std::string PopFlowStack(Operation); | ||
| 270 | std::string Exit(Operation); | ||
| 271 | std::string Discard(Operation); | ||
| 272 | std::string EmitVertex(Operation); | ||
| 273 | std::string EndPrimitive(Operation); | ||
| 274 | std::string InvocationId(Operation); | ||
| 275 | std::string YNegate(Operation); | ||
| 276 | std::string ThreadId(Operation); | ||
| 277 | std::string ShuffleIndexed(Operation); | ||
| 278 | std::string Barrier(Operation); | ||
| 279 | std::string MemoryBarrierGroup(Operation); | ||
| 280 | std::string MemoryBarrierGlobal(Operation); | ||
| 281 | |||
| 282 | template <const std::string_view& op> | ||
| 283 | std::string Unary(Operation operation) { | ||
| 284 | const std::string temporary = AllocTemporary(); | ||
| 285 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 286 | return temporary; | ||
| 287 | } | ||
| 288 | |||
| 289 | template <const std::string_view& op> | ||
| 290 | std::string Binary(Operation operation) { | ||
| 291 | const std::string temporary = AllocTemporary(); | ||
| 292 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 293 | Visit(operation[1])); | ||
| 294 | return temporary; | ||
| 295 | } | ||
| 296 | |||
| 297 | template <const std::string_view& op> | ||
| 298 | std::string Trinary(Operation operation) { | ||
| 299 | const std::string temporary = AllocTemporary(); | ||
| 300 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 301 | Visit(operation[1]), Visit(operation[2])); | ||
| 302 | return temporary; | ||
| 303 | } | ||
| 304 | |||
| 305 | template <const std::string_view& op, bool unordered> | ||
| 306 | std::string FloatComparison(Operation operation) { | ||
| 307 | const std::string temporary = AllocTemporary(); | ||
| 308 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 309 | AddLine("MOV.S {}, 0;", temporary); | ||
| 310 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 311 | |||
| 312 | const std::string op_a = Visit(operation[0]); | ||
| 313 | const std::string op_b = Visit(operation[1]); | ||
| 314 | if constexpr (unordered) { | ||
| 315 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 316 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 317 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 318 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 319 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 320 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 321 | } else if (op == SNE_F) { | ||
| 322 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 323 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 324 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 325 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 326 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 327 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 328 | } | ||
| 329 | return temporary; | ||
| 330 | } | ||
| 331 | |||
| 332 | template <const std::string_view& op, bool is_nan> | ||
| 333 | std::string HalfComparison(Operation operation) { | ||
| 334 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 335 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 336 | const std::string op_a = Visit(operation[0]); | ||
| 337 | const std::string op_b = Visit(operation[1]); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 339 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 340 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 341 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 342 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 343 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 344 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 345 | if constexpr (is_nan) { | ||
| 346 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 347 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 348 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 349 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 350 | } | ||
| 351 | return tmp1; | ||
| 352 | } | ||
| 353 | |||
| 354 | template <const std::string_view& op, const std::string_view& type> | ||
| 355 | std::string AtomicImage(Operation operation) { | ||
| 356 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 357 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 358 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 359 | const std::size_t num_values = meta.values.size(); | ||
| 360 | |||
| 361 | const std::string coord = AllocVectorTemporary(); | ||
| 362 | const std::string value = AllocVectorTemporary(); | ||
| 363 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 364 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 365 | } | ||
| 366 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 367 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 368 | } | ||
| 369 | |||
| 370 | const std::string result = coord; | ||
| 371 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord, | ||
| 372 | image_id, ImageType(meta.image.type)); | ||
| 373 | return fmt::format("{}.x", result); | ||
| 374 | } | ||
| 375 | |||
| 376 | template <const std::string_view& op, const std::string_view& type> | ||
| 377 | std::string Atomic(Operation operation) { | ||
| 378 | const std::string temporary = AllocTemporary(); | ||
| 379 | std::string address; | ||
| 380 | std::string_view opname; | ||
| 381 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 382 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 383 | Visit(gmem->GetBaseAddress())); | ||
| 384 | address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); | ||
| 385 | opname = "ATOMB"; | ||
| 386 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 387 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 388 | opname = "ATOMS"; | ||
| 389 | } else { | ||
| 390 | UNREACHABLE(); | ||
| 391 | return "{0, 0, 0, 0}"; | ||
| 392 | } | ||
| 393 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 394 | return temporary; | ||
| 395 | } | ||
| 396 | |||
| 397 | template <char type> | ||
| 398 | std::string Negate(Operation operation) { | ||
| 399 | const std::string temporary = AllocTemporary(); | ||
| 400 | if constexpr (type == 'F') { | ||
| 401 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 402 | } else { | ||
| 403 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 404 | } | ||
| 405 | return temporary; | ||
| 406 | } | ||
| 407 | |||
| 408 | template <char type> | ||
| 409 | std::string Absolute(Operation operation) { | ||
| 410 | const std::string temporary = AllocTemporary(); | ||
| 411 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 412 | return temporary; | ||
| 413 | } | ||
| 414 | |||
| 415 | template <char type> | ||
| 416 | std::string BitfieldInsert(Operation operation) { | ||
| 417 | const std::string temporary = AllocVectorTemporary(); | ||
| 418 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 419 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 420 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 421 | Visit(operation[0])); | ||
| 422 | return fmt::format("{}.x", temporary); | ||
| 423 | } | ||
| 424 | |||
| 425 | template <char type> | ||
| 426 | std::string BitfieldExtract(Operation operation) { | ||
| 427 | const std::string temporary = AllocVectorTemporary(); | ||
| 428 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 429 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 430 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 431 | return fmt::format("{}.x", temporary); | ||
| 432 | } | ||
| 433 | |||
| 434 | template <char swizzle> | ||
| 435 | std::string LocalInvocationId(Operation) { | ||
| 436 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 437 | } | ||
| 438 | |||
| 439 | template <char swizzle> | ||
| 440 | std::string WorkGroupId(Operation) { | ||
| 441 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 442 | } | ||
| 443 | |||
| 444 | template <char c1, char c2> | ||
| 445 | std::string ThreadMask(Operation) { | ||
| 446 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 447 | } | ||
| 448 | |||
| 449 | template <typename... Args> | ||
| 450 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 451 | shader_source += fmt::format(text, std::forward<Args>(args)...); | ||
| 452 | } | ||
| 453 | |||
| 454 | template <typename... Args> | ||
| 455 | void AddLine(std::string_view text, Args&&... args) { | ||
| 456 | AddExpression(text, std::forward<Args>(args)...); | ||
| 457 | shader_source += '\n'; | ||
| 458 | } | ||
| 459 | |||
| 460 | std::string AllocTemporary() { | ||
| 461 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 462 | return fmt::format("T{}.x", num_temporaries++); | ||
| 463 | } | ||
| 464 | |||
| 465 | std::string AllocVectorTemporary() { | ||
| 466 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 467 | return fmt::format("T{}", num_temporaries++); | ||
| 468 | } | ||
| 469 | |||
| 470 | void ResetTemporaries() noexcept { | ||
| 471 | num_temporaries = 0; | ||
| 472 | } | ||
| 473 | |||
| 474 | const Device& device; | ||
| 475 | const ShaderIR& ir; | ||
| 476 | const Registry& registry; | ||
| 477 | const ShaderType stage; | ||
| 478 | |||
| 479 | std::size_t num_temporaries = 0; | ||
| 480 | std::size_t max_temporaries = 0; | ||
| 481 | |||
| 482 | std::string shader_source; | ||
| 483 | |||
| 484 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 485 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 486 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 487 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 488 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 489 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 490 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 491 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 492 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 493 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 494 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 495 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 496 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 497 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 498 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 499 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 500 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 501 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 502 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 503 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 504 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 505 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 506 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 507 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 508 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 509 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 510 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 511 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 512 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 513 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 514 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 515 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 516 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 517 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 518 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 519 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 520 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 521 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 522 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 523 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 524 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 525 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 526 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 527 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 528 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 529 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 530 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 531 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 532 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 533 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 534 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 535 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 536 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 537 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 538 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 539 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 540 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 541 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 542 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 543 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 544 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 545 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 546 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 547 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 548 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 549 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 550 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 551 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 552 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 553 | static constexpr std::string_view ADD = "ADD"; | ||
| 554 | static constexpr std::string_view MIN = "MIN"; | ||
| 555 | static constexpr std::string_view MAX = "MAX"; | ||
| 556 | static constexpr std::string_view AND = "AND"; | ||
| 557 | static constexpr std::string_view OR = "OR"; | ||
| 558 | static constexpr std::string_view XOR = "XOR"; | ||
| 559 | static constexpr std::string_view U32 = "U32"; | ||
| 560 | static constexpr std::string_view S32 = "S32"; | ||
| 561 | |||
| 562 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 563 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 564 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 565 | &ARBDecompiler::Assign, | ||
| 566 | |||
| 567 | &ARBDecompiler::Select, | ||
| 568 | |||
| 569 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 570 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 571 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 572 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 573 | &ARBDecompiler::Negate<'F'>, | ||
| 574 | &ARBDecompiler::Absolute<'F'>, | ||
| 575 | &ARBDecompiler::FClamp, | ||
| 576 | &ARBDecompiler::FCastHalf0, | ||
| 577 | &ARBDecompiler::FCastHalf1, | ||
| 578 | &ARBDecompiler::Binary<MIN_F>, | ||
| 579 | &ARBDecompiler::Binary<MAX_F>, | ||
| 580 | &ARBDecompiler::Unary<COS_F32>, | ||
| 581 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 582 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 583 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 584 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 585 | &ARBDecompiler::FSqrt, | ||
| 586 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 587 | &ARBDecompiler::Unary<FLR_F>, | ||
| 588 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 589 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 590 | &ARBDecompiler::Unary<I2F_S>, | ||
| 591 | &ARBDecompiler::Unary<I2F_U>, | ||
| 592 | &ARBDecompiler::FSwizzleAdd, | ||
| 593 | |||
| 594 | &ARBDecompiler::Binary<ADD_S>, | ||
| 595 | &ARBDecompiler::Binary<MUL_S>, | ||
| 596 | &ARBDecompiler::Binary<DIV_S>, | ||
| 597 | &ARBDecompiler::Negate<'S'>, | ||
| 598 | &ARBDecompiler::Absolute<'S'>, | ||
| 599 | &ARBDecompiler::Binary<MIN_S>, | ||
| 600 | &ARBDecompiler::Binary<MAX_S>, | ||
| 601 | |||
| 602 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 603 | &ARBDecompiler::Unary<MOV_U>, | ||
| 604 | &ARBDecompiler::Binary<SHL_S>, | ||
| 605 | &ARBDecompiler::Binary<SHR_U>, | ||
| 606 | &ARBDecompiler::Binary<SHR_S>, | ||
| 607 | &ARBDecompiler::Binary<AND_S>, | ||
| 608 | &ARBDecompiler::Binary<OR_S>, | ||
| 609 | &ARBDecompiler::Binary<XOR_S>, | ||
| 610 | &ARBDecompiler::Unary<NOT_S>, | ||
| 611 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 612 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 613 | &ARBDecompiler::Unary<BTC_S>, | ||
| 614 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 615 | |||
| 616 | &ARBDecompiler::Binary<ADD_U>, | ||
| 617 | &ARBDecompiler::Binary<MUL_U>, | ||
| 618 | &ARBDecompiler::Binary<DIV_U>, | ||
| 619 | &ARBDecompiler::Binary<MIN_U>, | ||
| 620 | &ARBDecompiler::Binary<MAX_U>, | ||
| 621 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 622 | &ARBDecompiler::Unary<MOV_U>, | ||
| 623 | &ARBDecompiler::Binary<SHL_U>, | ||
| 624 | &ARBDecompiler::Binary<SHR_U>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<AND_U>, | ||
| 627 | &ARBDecompiler::Binary<OR_U>, | ||
| 628 | &ARBDecompiler::Binary<XOR_U>, | ||
| 629 | &ARBDecompiler::Unary<NOT_U>, | ||
| 630 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 631 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 632 | &ARBDecompiler::Unary<BTC_U>, | ||
| 633 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 634 | |||
| 635 | &ARBDecompiler::HAdd2, | ||
| 636 | &ARBDecompiler::HMul2, | ||
| 637 | &ARBDecompiler::HFma2, | ||
| 638 | &ARBDecompiler::HAbsolute, | ||
| 639 | &ARBDecompiler::HNegate, | ||
| 640 | &ARBDecompiler::HClamp, | ||
| 641 | &ARBDecompiler::HCastFloat, | ||
| 642 | &ARBDecompiler::HUnpack, | ||
| 643 | &ARBDecompiler::HMergeF32, | ||
| 644 | &ARBDecompiler::HMergeH0, | ||
| 645 | &ARBDecompiler::HMergeH1, | ||
| 646 | &ARBDecompiler::HPack2, | ||
| 647 | |||
| 648 | &ARBDecompiler::LogicalAssign, | ||
| 649 | &ARBDecompiler::Binary<AND_U>, | ||
| 650 | &ARBDecompiler::Binary<OR_U>, | ||
| 651 | &ARBDecompiler::Binary<XOR_U>, | ||
| 652 | &ARBDecompiler::Unary<NOT_U>, | ||
| 653 | &ARBDecompiler::LogicalPick2, | ||
| 654 | &ARBDecompiler::LogicalAnd2, | ||
| 655 | |||
| 656 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 657 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 658 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 659 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 660 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 661 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 662 | &ARBDecompiler::FloatOrdered, | ||
| 663 | &ARBDecompiler::FloatUnordered, | ||
| 664 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 665 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 666 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 667 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 668 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 669 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 670 | |||
| 671 | &ARBDecompiler::Binary<SLT_S>, | ||
| 672 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 673 | &ARBDecompiler::Binary<SLE_S>, | ||
| 674 | &ARBDecompiler::Binary<SGT_S>, | ||
| 675 | &ARBDecompiler::Binary<SNE_S>, | ||
| 676 | &ARBDecompiler::Binary<SGE_S>, | ||
| 677 | |||
| 678 | &ARBDecompiler::Binary<SLT_U>, | ||
| 679 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 680 | &ARBDecompiler::Binary<SLE_U>, | ||
| 681 | &ARBDecompiler::Binary<SGT_U>, | ||
| 682 | &ARBDecompiler::Binary<SNE_U>, | ||
| 683 | &ARBDecompiler::Binary<SGE_U>, | ||
| 684 | |||
| 685 | &ARBDecompiler::LogicalAddCarry, | ||
| 686 | |||
| 687 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 688 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 689 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 690 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 691 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 692 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 693 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 694 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 695 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 696 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 697 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 698 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 699 | |||
| 700 | &ARBDecompiler::Texture, | ||
| 701 | &ARBDecompiler::Texture, | ||
| 702 | &ARBDecompiler::TextureGather, | ||
| 703 | &ARBDecompiler::TextureQueryDimensions, | ||
| 704 | &ARBDecompiler::TextureQueryLod, | ||
| 705 | &ARBDecompiler::TexelFetch, | ||
| 706 | &ARBDecompiler::TextureGradient, | ||
| 707 | |||
| 708 | &ARBDecompiler::ImageLoad, | ||
| 709 | &ARBDecompiler::ImageStore, | ||
| 710 | |||
| 711 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 712 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 713 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 714 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 715 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 716 | |||
| 717 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 718 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 719 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 720 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 721 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 722 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 723 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 724 | |||
| 725 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 726 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 727 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 728 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 729 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 730 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 731 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 732 | |||
| 733 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 734 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 735 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 736 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 737 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 738 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 739 | |||
| 740 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 741 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 742 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 743 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 744 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 745 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 746 | |||
| 747 | &ARBDecompiler::Branch, | ||
| 748 | &ARBDecompiler::BranchIndirect, | ||
| 749 | &ARBDecompiler::PushFlowStack, | ||
| 750 | &ARBDecompiler::PopFlowStack, | ||
| 751 | &ARBDecompiler::Exit, | ||
| 752 | &ARBDecompiler::Discard, | ||
| 753 | |||
| 754 | &ARBDecompiler::EmitVertex, | ||
| 755 | &ARBDecompiler::EndPrimitive, | ||
| 756 | |||
| 757 | &ARBDecompiler::InvocationId, | ||
| 758 | &ARBDecompiler::YNegate, | ||
| 759 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 760 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 761 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 762 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 763 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 764 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 765 | |||
| 766 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 767 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 768 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 769 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 770 | |||
| 771 | &ARBDecompiler::ThreadId, | ||
| 772 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 773 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 774 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 775 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 776 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 777 | &ARBDecompiler::ShuffleIndexed, | ||
| 778 | |||
| 779 | &ARBDecompiler::Barrier, | ||
| 780 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 781 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 782 | }; | ||
| 783 | }; | ||
| 784 | |||
| 785 | ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 786 | ShaderType stage, std::string_view identifier) | ||
| 787 | : device{device}, ir{ir}, registry{registry}, stage{stage} { | ||
| 788 | AddLine("TEMP RC;"); | ||
| 789 | AddLine("TEMP FSWZA[4];"); | ||
| 790 | AddLine("TEMP FSWZB[4];"); | ||
| 791 | if (ir.IsDecompiled()) { | ||
| 792 | DecompileAST(); | ||
| 793 | } else { | ||
| 794 | DecompileBranchMode(); | ||
| 795 | } | ||
| 796 | AddLine("END"); | ||
| 797 | |||
| 798 | const std::string code = std::move(shader_source); | ||
| 799 | DeclareHeader(); | ||
| 800 | DeclareVertex(); | ||
| 801 | DeclareGeometry(); | ||
| 802 | DeclareFragment(); | ||
| 803 | DeclareCompute(); | ||
| 804 | DeclareInputAttributes(); | ||
| 805 | DeclareOutputAttributes(); | ||
| 806 | DeclareLocalMemory(); | ||
| 807 | DeclareGlobalMemory(); | ||
| 808 | DeclareConstantBuffers(); | ||
| 809 | DeclareRegisters(); | ||
| 810 | DeclareTemporaries(); | ||
| 811 | DeclarePredicates(); | ||
| 812 | DeclareInternalFlags(); | ||
| 813 | |||
| 814 | shader_source += code; | ||
| 815 | } | ||
| 816 | |||
| 817 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 818 | switch (stage) { | ||
| 819 | case ShaderType::Vertex: | ||
| 820 | return "vp"; | ||
| 821 | case ShaderType::Geometry: | ||
| 822 | return "gp"; | ||
| 823 | case ShaderType::Fragment: | ||
| 824 | return "fp"; | ||
| 825 | case ShaderType::Compute: | ||
| 826 | return "cp"; | ||
| 827 | default: | ||
| 828 | UNREACHABLE(); | ||
| 829 | return ""; | ||
| 830 | } | ||
| 831 | } | ||
| 832 | |||
| 833 | void ARBDecompiler::DeclareHeader() { | ||
| 834 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 835 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 836 | AddLine("OPTION NV_internal;"); | ||
| 837 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 838 | AddLine("OPTION NV_shader_storage_buffer;"); | ||
| 839 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 840 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 841 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 842 | } | ||
| 843 | if (stage == ShaderType::Vertex) { | ||
| 844 | if (device.HasNvViewportArray2()) { | ||
| 845 | AddLine("OPTION NV_viewport_array2;"); | ||
| 846 | } | ||
| 847 | } | ||
| 848 | if (stage == ShaderType::Fragment) { | ||
| 849 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 850 | } | ||
| 851 | if (device.HasImageLoadFormatted()) { | ||
| 852 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 853 | } | ||
| 854 | } | ||
| 855 | |||
| 856 | void ARBDecompiler::DeclareVertex() { | ||
| 857 | if (stage != ShaderType::Vertex) { | ||
| 858 | return; | ||
| 859 | } | ||
| 860 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 861 | } | ||
| 862 | |||
| 863 | void ARBDecompiler::DeclareGeometry() { | ||
| 864 | if (stage != ShaderType::Geometry) { | ||
| 865 | return; | ||
| 866 | } | ||
| 867 | const auto& info = registry.GetGraphicsInfo(); | ||
| 868 | const auto& header = ir.GetHeader(); | ||
| 869 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 870 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 871 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 872 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 873 | } | ||
| 874 | |||
| 875 | void ARBDecompiler::DeclareFragment() { | ||
| 876 | if (stage != ShaderType::Fragment) { | ||
| 877 | return; | ||
| 878 | } | ||
| 879 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 880 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 881 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 882 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 883 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 884 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 885 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 886 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 887 | } | ||
| 888 | |||
| 889 | void ARBDecompiler::DeclareCompute() { | ||
| 890 | if (stage != ShaderType::Compute) { | ||
| 891 | return; | ||
| 892 | } | ||
| 893 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 894 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 895 | info.workgroup_size[2]); | ||
| 896 | if (info.shared_memory_size_in_words > 0) { | ||
| 897 | const u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 898 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 899 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 900 | } | ||
| 901 | } | ||
| 902 | |||
| 903 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 904 | if (stage == ShaderType::Compute) { | ||
| 905 | return; | ||
| 906 | } | ||
| 907 | const std::string_view stage_name = StageInputName(stage); | ||
| 908 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 909 | if (!IsGenericAttribute(attribute)) { | ||
| 910 | continue; | ||
| 911 | } | ||
| 912 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 913 | |||
| 914 | std::string_view suffix; | ||
| 915 | if (stage == ShaderType::Fragment) { | ||
| 916 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 917 | if (input_mode == PixelImap::Unused) { | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | suffix = GetInputFlags(input_mode); | ||
| 921 | } | ||
| 922 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 923 | index); | ||
| 924 | } | ||
| 925 | } | ||
| 926 | |||
| 927 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 928 | if (stage == ShaderType::Compute) { | ||
| 929 | return; | ||
| 930 | } | ||
| 931 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 932 | if (!IsGenericAttribute(attribute)) { | ||
| 933 | continue; | ||
| 934 | } | ||
| 935 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 936 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 937 | } | ||
| 938 | } | ||
| 939 | |||
| 940 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 941 | u64 size = 0; | ||
| 942 | if (stage == ShaderType::Compute) { | ||
| 943 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 944 | } else { | ||
| 945 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 946 | } | ||
| 947 | if (size == 0) { | ||
| 948 | return; | ||
| 949 | } | ||
| 950 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 951 | AddLine("TEMP lmem[{}];", element_count); | ||
| 952 | } | ||
| 953 | |||
| 954 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 955 | u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 956 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 957 | const auto& base = pair.first; | ||
| 958 | AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); | ||
| 959 | ++binding; | ||
| 960 | } | ||
| 961 | } | ||
| 962 | |||
| 963 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 964 | u32 binding = 0; | ||
| 965 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 966 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 967 | ++binding; | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | void ARBDecompiler::DeclareRegisters() { | ||
| 972 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 973 | AddLine("TEMP R{};", gpr); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | void ARBDecompiler::DeclareTemporaries() { | ||
| 978 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 979 | AddLine("TEMP T{};", i); | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 983 | void ARBDecompiler::DeclarePredicates() { | ||
| 984 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 985 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 990 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 991 | AddLine("TEMP {};", name); | ||
| 992 | } | ||
| 993 | } | ||
| 994 | |||
| 995 | void ARBDecompiler::InitializeVariables() { | ||
| 996 | AddLine("MOV.F32 FSWZA[0], -1;"); | ||
| 997 | AddLine("MOV.F32 FSWZA[1], 1;"); | ||
| 998 | AddLine("MOV.F32 FSWZA[2], -1;"); | ||
| 999 | AddLine("MOV.F32 FSWZA[3], 0;"); | ||
| 1000 | AddLine("MOV.F32 FSWZB[0], -1;"); | ||
| 1001 | AddLine("MOV.F32 FSWZB[1], -1;"); | ||
| 1002 | AddLine("MOV.F32 FSWZB[2], 1;"); | ||
| 1003 | AddLine("MOV.F32 FSWZB[3], -1;"); | ||
| 1004 | |||
| 1005 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 1006 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 1007 | } | ||
| 1008 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 1009 | if (!IsGenericAttribute(attribute)) { | ||
| 1010 | continue; | ||
| 1011 | } | ||
| 1012 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1013 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1014 | } | ||
| 1015 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1016 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1017 | } | ||
| 1018 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1019 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1020 | } | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | void ARBDecompiler::DecompileAST() { | ||
| 1024 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1025 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1026 | AddLine("TEMP F{};", i); | ||
| 1027 | } | ||
| 1028 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1029 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | InitializeVariables(); | ||
| 1033 | |||
| 1034 | VisitAST(ir.GetASTProgram()); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1038 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1039 | if (!ir.IsFlowStackDisabled()) { | ||
| 1040 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1041 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1042 | AddLine("TEMP SSY_TOP;"); | ||
| 1043 | AddLine("TEMP PBK_TOP;"); | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | AddLine("TEMP PC;"); | ||
| 1047 | |||
| 1048 | if (!ir.IsFlowStackDisabled()) { | ||
| 1049 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1050 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | InitializeVariables(); | ||
| 1054 | |||
| 1055 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1056 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1057 | const u32 first_address = basic_block_it->first; | ||
| 1058 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1059 | |||
| 1060 | AddLine("REP;"); | ||
| 1061 | |||
| 1062 | std::size_t num_blocks = 0; | ||
| 1063 | while (basic_block_it != basic_block_end) { | ||
| 1064 | const auto& [address, bb] = *basic_block_it; | ||
| 1065 | ++num_blocks; | ||
| 1066 | |||
| 1067 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1068 | AddLine("IF NE.x;"); | ||
| 1069 | |||
| 1070 | VisitBlock(bb); | ||
| 1071 | |||
| 1072 | ++basic_block_it; | ||
| 1073 | |||
| 1074 | if (basic_block_it != basic_block_end) { | ||
| 1075 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1076 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1077 | const u32 next_address = basic_block_it->first; | ||
| 1078 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1079 | AddLine("CONT;"); | ||
| 1080 | } | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | AddLine("ELSE;"); | ||
| 1084 | } | ||
| 1085 | AddLine("RET;"); | ||
| 1086 | while (num_blocks--) { | ||
| 1087 | AddLine("ENDIF;"); | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | AddLine("ENDREP;"); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1094 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1095 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1096 | VisitAST(current); | ||
| 1097 | } | ||
| 1098 | } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1099 | const std::string condition = VisitExpression(ast->condition); | ||
| 1100 | ResetTemporaries(); | ||
| 1101 | |||
| 1102 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1103 | AddLine("IF NE.x;"); | ||
| 1104 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1105 | VisitAST(current); | ||
| 1106 | } | ||
| 1107 | AddLine("ENDIF;"); | ||
| 1108 | } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1109 | AddLine("ELSE;"); | ||
| 1110 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1111 | VisitAST(current); | ||
| 1112 | } | ||
| 1113 | } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1114 | VisitBlock(ast->nodes); | ||
| 1115 | } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1116 | AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); | ||
| 1117 | ResetTemporaries(); | ||
| 1118 | } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1119 | const std::string condition = VisitExpression(ast->condition); | ||
| 1120 | ResetTemporaries(); | ||
| 1121 | AddLine("REP;"); | ||
| 1122 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1123 | VisitAST(current); | ||
| 1124 | } | ||
| 1125 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1126 | AddLine("BRK (NE.x);"); | ||
| 1127 | AddLine("ENDREP;"); | ||
| 1128 | } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1129 | const bool is_true = ExprIsTrue(ast->condition); | ||
| 1130 | if (!is_true) { | ||
| 1131 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1132 | AddLine("IF NE.x;"); | ||
| 1133 | ResetTemporaries(); | ||
| 1134 | } | ||
| 1135 | if (ast->kills) { | ||
| 1136 | AddLine("KIL TR;"); | ||
| 1137 | } else { | ||
| 1138 | Exit(); | ||
| 1139 | } | ||
| 1140 | if (!is_true) { | ||
| 1141 | AddLine("ENDIF;"); | ||
| 1142 | } | ||
| 1143 | } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1144 | if (ExprIsTrue(ast->condition)) { | ||
| 1145 | AddLine("BRK;"); | ||
| 1146 | } else { | ||
| 1147 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1148 | AddLine("BRK (NE.x);"); | ||
| 1149 | ResetTemporaries(); | ||
| 1150 | } | ||
| 1151 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1152 | // Nothing to do | ||
| 1153 | } else { | ||
| 1154 | UNREACHABLE(); | ||
| 1155 | } | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1159 | const std::string result = AllocTemporary(); | ||
| 1160 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1161 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1162 | VisitExpression(expr->operand2)); | ||
| 1163 | return result; | ||
| 1164 | } | ||
| 1165 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1166 | const std::string result = AllocTemporary(); | ||
| 1167 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1168 | VisitExpression(expr->operand2)); | ||
| 1169 | return result; | ||
| 1170 | } | ||
| 1171 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1172 | const std::string result = AllocTemporary(); | ||
| 1173 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1174 | return result; | ||
| 1175 | } | ||
| 1176 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1177 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1178 | } | ||
| 1179 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1180 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1181 | } | ||
| 1182 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1183 | return fmt::format("F{}.x", expr->var_index); | ||
| 1184 | } | ||
| 1185 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1186 | return expr->value ? "0xffffffff" : "0"; | ||
| 1187 | } | ||
| 1188 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1189 | const std::string result = AllocTemporary(); | ||
| 1190 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1191 | return result; | ||
| 1192 | } | ||
| 1193 | UNREACHABLE(); | ||
| 1194 | return "0"; | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1198 | for (const auto& node : bb) { | ||
| 1199 | Visit(node); | ||
| 1200 | } | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1204 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1205 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1206 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1207 | } | ||
| 1208 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1209 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1210 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1211 | return {}; | ||
| 1212 | } | ||
| 1213 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1214 | if (decompiler == nullptr) { | ||
| 1215 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1216 | return {}; | ||
| 1217 | } | ||
| 1218 | return (this->*decompiler)(*operation); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1222 | const u32 index = gpr->GetIndex(); | ||
| 1223 | if (index == Register::ZeroIndex) { | ||
| 1224 | return "{0, 0, 0, 0}.x"; | ||
| 1225 | } | ||
| 1226 | return fmt::format("R{}.x", index); | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1230 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1234 | const std::string temporary = AllocTemporary(); | ||
| 1235 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1236 | return temporary; | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1240 | const std::string temporary = AllocTemporary(); | ||
| 1241 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1242 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1243 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1244 | break; | ||
| 1245 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1246 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1247 | break; | ||
| 1248 | default: | ||
| 1249 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1250 | break; | ||
| 1251 | } | ||
| 1252 | if (predicate->IsNegated()) { | ||
| 1253 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1254 | } | ||
| 1255 | return temporary; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1259 | if (abuf->IsPhysicalBuffer()) { | ||
| 1260 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1261 | return "{0, 0, 0, 0}.x"; | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | const auto buffer_index = [this, &abuf]() -> std::string { | ||
| 1265 | if (stage != ShaderType::Geometry) { | ||
| 1266 | return ""; | ||
| 1267 | } | ||
| 1268 | return fmt::format("[{}]", Visit(abuf->GetBuffer())); | ||
| 1269 | }; | ||
| 1270 | |||
| 1271 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1272 | const u32 element = abuf->GetElement(); | ||
| 1273 | const char swizzle = Swizzle(element); | ||
| 1274 | switch (index) { | ||
| 1275 | case Attribute::Index::Position: { | ||
| 1276 | if (stage == ShaderType::Geometry) { | ||
| 1277 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1278 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1279 | } else { | ||
| 1280 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1281 | } | ||
| 1282 | } | ||
| 1283 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1284 | ASSERT(stage == ShaderType::Vertex); | ||
| 1285 | switch (element) { | ||
| 1286 | case 2: | ||
| 1287 | return "vertex.instance"; | ||
| 1288 | case 3: | ||
| 1289 | return "vertex.id"; | ||
| 1290 | } | ||
| 1291 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1292 | break; | ||
| 1293 | case Attribute::Index::PointCoord: | ||
| 1294 | switch (element) { | ||
| 1295 | case 0: | ||
| 1296 | return "fragment.pointcoord.x"; | ||
| 1297 | case 1: | ||
| 1298 | return "fragment.pointcoord.y"; | ||
| 1299 | } | ||
| 1300 | UNIMPLEMENTED(); | ||
| 1301 | break; | ||
| 1302 | case Attribute::Index::FrontFacing: { | ||
| 1303 | ASSERT(stage == ShaderType::Fragment); | ||
| 1304 | ASSERT(element == 3); | ||
| 1305 | const std::string temporary = AllocVectorTemporary(); | ||
| 1306 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1307 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1308 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1309 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1310 | return fmt::format("{}.x", temporary); | ||
| 1311 | } | ||
| 1312 | default: | ||
| 1313 | if (IsGenericAttribute(index)) { | ||
| 1314 | if (stage == ShaderType::Geometry) { | ||
| 1315 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1316 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1317 | } else { | ||
| 1318 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1319 | GetGenericAttributeIndex(index), swizzle); | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); | ||
| 1323 | break; | ||
| 1324 | } | ||
| 1325 | return "{0, 0, 0, 0}.x"; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1329 | std::string offset_string; | ||
| 1330 | const auto& offset = cbuf->GetOffset(); | ||
| 1331 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1332 | offset_string = std::to_string(imm->GetValue()); | ||
| 1333 | } else { | ||
| 1334 | offset_string = Visit(offset); | ||
| 1335 | } | ||
| 1336 | const std::string temporary = AllocTemporary(); | ||
| 1337 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1338 | return temporary; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1342 | const std::string temporary = AllocTemporary(); | ||
| 1343 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1344 | Visit(gmem->GetBaseAddress())); | ||
| 1345 | AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1346 | temporary); | ||
| 1347 | return temporary; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1351 | const std::string temporary = Visit(lmem->GetAddress()); | ||
| 1352 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1353 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1354 | return temporary; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1358 | const std::string temporary = Visit(smem->GetAddress()); | ||
| 1359 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1360 | return temporary; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1364 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1365 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1369 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1370 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1371 | } | ||
| 1372 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1373 | AddLine("IF NE.x;"); | ||
| 1374 | VisitBlock(conditional->GetCode()); | ||
| 1375 | AddLine("ENDIF;"); | ||
| 1376 | return {}; | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | if (const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1380 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1381 | // AddLine("// {}", cmt->GetText()); | ||
| 1382 | return {}; | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | UNIMPLEMENTED(); | ||
| 1386 | return {}; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1390 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1391 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1392 | UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1393 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube); | ||
| 1394 | |||
| 1395 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1396 | std::string temporary = AllocVectorTemporary(); | ||
| 1397 | std::size_t i = 0; | ||
| 1398 | for (; i < count; ++i) { | ||
| 1399 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1400 | } | ||
| 1401 | if (meta.sampler.is_array) { | ||
| 1402 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array)); | ||
| 1403 | } | ||
| 1404 | if (meta.sampler.is_shadow) { | ||
| 1405 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare)); | ||
| 1406 | } | ||
| 1407 | return {std::move(temporary), i}; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1411 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1412 | if (meta.aoffi.empty()) { | ||
| 1413 | return {}; | ||
| 1414 | } | ||
| 1415 | const std::string temporary = AllocVectorTemporary(); | ||
| 1416 | std::size_t i = 0; | ||
| 1417 | for (auto& node : meta.aoffi) { | ||
| 1418 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1419 | } | ||
| 1420 | return fmt::format(", offset({})", temporary); | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | void ARBDecompiler::Exit() { | ||
| 1424 | if (stage != ShaderType::Fragment) { | ||
| 1425 | AddLine("RET;"); | ||
| 1426 | return; | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1430 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 1431 | const auto& used_registers = ir.GetRegisters(); | ||
| 1432 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 1433 | return fmt::format("R{}.x", reg); | ||
| 1434 | } | ||
| 1435 | return "{0, 0, 0, 0}.x"; | ||
| 1436 | }; | ||
| 1437 | |||
| 1438 | const auto& header = ir.GetHeader(); | ||
| 1439 | u32 current_reg = 0; | ||
| 1440 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1441 | for (u32 component = 0; component < 4; ++component) { | ||
| 1442 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1443 | continue; | ||
| 1444 | } | ||
| 1445 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1446 | safe_get_register(current_reg)); | ||
| 1447 | ++current_reg; | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | if (header.ps.omap.depth) { | ||
| 1451 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | AddLine("RET;"); | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1458 | const Node& dest = operation[0]; | ||
| 1459 | const Node& src = operation[1]; | ||
| 1460 | |||
| 1461 | std::string dest_name; | ||
| 1462 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1463 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1464 | // Writing to Register::ZeroIndex is a no op | ||
| 1465 | return {}; | ||
| 1466 | } | ||
| 1467 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1468 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1469 | const u32 element = abuf->GetElement(); | ||
| 1470 | const char swizzle = Swizzle(element); | ||
| 1471 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1472 | case Attribute::Index::Position: | ||
| 1473 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1474 | break; | ||
| 1475 | case Attribute::Index::LayerViewportPointSize: | ||
| 1476 | switch (element) { | ||
| 1477 | case 0: | ||
| 1478 | UNIMPLEMENTED(); | ||
| 1479 | return {}; | ||
| 1480 | case 1: | ||
| 1481 | case 2: | ||
| 1482 | if (!device.HasNvViewportArray2()) { | ||
| 1483 | LOG_ERROR( | ||
| 1484 | Render_OpenGL, | ||
| 1485 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1486 | return {}; | ||
| 1487 | } | ||
| 1488 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1489 | break; | ||
| 1490 | case 3: | ||
| 1491 | dest_name = "result.pointsize.x"; | ||
| 1492 | break; | ||
| 1493 | } | ||
| 1494 | break; | ||
| 1495 | case Attribute::Index::ClipDistances0123: | ||
| 1496 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1497 | break; | ||
| 1498 | case Attribute::Index::ClipDistances4567: | ||
| 1499 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1500 | break; | ||
| 1501 | default: | ||
| 1502 | if (!IsGenericAttribute(index)) { | ||
| 1503 | UNREACHABLE(); | ||
| 1504 | return {}; | ||
| 1505 | } | ||
| 1506 | dest_name = | ||
| 1507 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1508 | break; | ||
| 1509 | } | ||
| 1510 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1511 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1512 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1513 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1514 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1515 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1516 | ResetTemporaries(); | ||
| 1517 | return {}; | ||
| 1518 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1519 | const std::string temporary = AllocTemporary(); | ||
| 1520 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1521 | Visit(gmem->GetBaseAddress())); | ||
| 1522 | AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1523 | temporary); | ||
| 1524 | ResetTemporaries(); | ||
| 1525 | return {}; | ||
| 1526 | } else { | ||
| 1527 | UNREACHABLE(); | ||
| 1528 | ResetTemporaries(); | ||
| 1529 | return {}; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1533 | ResetTemporaries(); | ||
| 1534 | return {}; | ||
| 1535 | } | ||
| 1536 | |||
| 1537 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1538 | const std::string temporary = AllocTemporary(); | ||
| 1539 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1540 | Visit(operation[2])); | ||
| 1541 | return temporary; | ||
| 1542 | } | ||
| 1543 | |||
| 1544 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1545 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1546 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1547 | |||
| 1548 | const std::string temporary = AllocTemporary(); | ||
| 1549 | const Node& value = operation[0]; | ||
| 1550 | const Node& low = operation[1]; | ||
| 1551 | const Node& high = operation[2]; | ||
| 1552 | const auto imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1553 | const auto imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1554 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1555 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1556 | } else { | ||
| 1557 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1558 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1559 | } | ||
| 1560 | return temporary; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1564 | const std::string temporary = AllocVectorTemporary(); | ||
| 1565 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1566 | return fmt::format("{}.x", temporary); | ||
| 1567 | } | ||
| 1568 | |||
| 1569 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1570 | const std::string temporary = AllocVectorTemporary(); | ||
| 1571 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1572 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1573 | return fmt::format("{}.x", temporary); | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1577 | const std::string temporary = AllocTemporary(); | ||
| 1578 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1579 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1580 | return temporary; | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1584 | const std::string temporary = AllocVectorTemporary(); | ||
| 1585 | if (!device.HasWarpIntrinsics()) { | ||
| 1586 | LOG_ERROR(Render_OpenGL, | ||
| 1587 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 1588 | AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1589 | return fmt::format("{}.x", temporary); | ||
| 1590 | } | ||
| 1591 | const std::string lut = AllocVectorTemporary(); | ||
| 1592 | AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); | ||
| 1593 | AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); | ||
| 1594 | AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); | ||
| 1595 | AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); | ||
| 1596 | AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); | ||
| 1597 | AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); | ||
| 1598 | AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); | ||
| 1599 | return fmt::format("{}.x", temporary); | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1603 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1604 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1605 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1606 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1607 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1608 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1609 | return fmt::format("{}.x", tmp1); | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1613 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1614 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1615 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1616 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1617 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1618 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1619 | return fmt::format("{}.x", tmp1); | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1623 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1624 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1625 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1626 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1627 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1628 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1629 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1630 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1631 | return fmt::format("{}.x", tmp1); | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1635 | const std::string temporary = AllocVectorTemporary(); | ||
| 1636 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1637 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1638 | return fmt::format("{}.x", temporary); | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1642 | const std::string temporary = AllocVectorTemporary(); | ||
| 1643 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1644 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1645 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1646 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1647 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1648 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1649 | return fmt::format("{}.x", temporary); | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1653 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1654 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1655 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1656 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1657 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1658 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1659 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1660 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1661 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1662 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1663 | return fmt::format("{}.x", tmp1); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1667 | const std::string temporary = AllocVectorTemporary(); | ||
| 1668 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1669 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1670 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1671 | return fmt::format("{}.x", temporary); | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1675 | const std::string operand = Visit(operation[0]); | ||
| 1676 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1677 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1678 | return operand; | ||
| 1679 | case Tegra::Shader::HalfType::F32: { | ||
| 1680 | const std::string temporary = AllocVectorTemporary(); | ||
| 1681 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1682 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1683 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1684 | return fmt::format("{}.x", temporary); | ||
| 1685 | } | ||
| 1686 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1687 | const std::string temporary = AllocVectorTemporary(); | ||
| 1688 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1689 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1690 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1691 | return fmt::format("{}.x", temporary); | ||
| 1692 | } | ||
| 1693 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1694 | const std::string temporary = AllocVectorTemporary(); | ||
| 1695 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1696 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1697 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1698 | return fmt::format("{}.x", temporary); | ||
| 1699 | } | ||
| 1700 | } | ||
| 1701 | UNREACHABLE(); | ||
| 1702 | return "{0, 0, 0, 0}.x"; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1706 | const std::string temporary = AllocVectorTemporary(); | ||
| 1707 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1708 | return fmt::format("{}.x", temporary); | ||
| 1709 | } | ||
| 1710 | |||
| 1711 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1712 | const std::string temporary = AllocVectorTemporary(); | ||
| 1713 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1714 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1715 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1716 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1717 | return fmt::format("{}.x", temporary); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1721 | const std::string temporary = AllocVectorTemporary(); | ||
| 1722 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1723 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1724 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1725 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1726 | return fmt::format("{}.x", temporary); | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1730 | const std::string temporary = AllocVectorTemporary(); | ||
| 1731 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1732 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1733 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1734 | return fmt::format("{}.x", temporary); | ||
| 1735 | } | ||
| 1736 | |||
| 1737 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1738 | const Node& dest = operation[0]; | ||
| 1739 | const Node& src = operation[1]; | ||
| 1740 | |||
| 1741 | std::string target; | ||
| 1742 | |||
| 1743 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1744 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1745 | |||
| 1746 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1747 | switch (index) { | ||
| 1748 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1749 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1750 | // Writing to these predicates is a no-op | ||
| 1751 | return {}; | ||
| 1752 | } | ||
| 1753 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1754 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1755 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1756 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1757 | } else { | ||
| 1758 | UNREACHABLE(); | ||
| 1759 | ResetTemporaries(); | ||
| 1760 | return {}; | ||
| 1761 | } | ||
| 1762 | |||
| 1763 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1764 | ResetTemporaries(); | ||
| 1765 | return {}; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1769 | const std::string temporary = AllocTemporary(); | ||
| 1770 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1771 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1772 | return temporary; | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1776 | const std::string temporary = AllocTemporary(); | ||
| 1777 | const std::string op = Visit(operation[0]); | ||
| 1778 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1779 | return temporary; | ||
| 1780 | } | ||
| 1781 | |||
| 1782 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1783 | const std::string temporary = AllocTemporary(); | ||
| 1784 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1785 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1786 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1787 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1788 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1789 | return temporary; | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1793 | const std::string temporary = AllocTemporary(); | ||
| 1794 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1795 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1796 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1797 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1798 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1799 | return temporary; | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1803 | const std::string temporary = AllocTemporary(); | ||
| 1804 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1805 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1806 | AddLine("IF CF.x;"); | ||
| 1807 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1808 | AddLine("ENDIF;"); | ||
| 1809 | return temporary; | ||
| 1810 | } | ||
| 1811 | |||
| 1812 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1813 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1814 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1815 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1816 | |||
| 1817 | std::string_view opcode = "TEX"; | ||
| 1818 | std::string extra; | ||
| 1819 | if (meta.bias) { | ||
| 1820 | ASSERT(!meta.lod); | ||
| 1821 | opcode = "TXB"; | ||
| 1822 | |||
| 1823 | if (swizzle < 4) { | ||
| 1824 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1825 | } else { | ||
| 1826 | const std::string bias = AllocTemporary(); | ||
| 1827 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1828 | extra = fmt::format(" {},", bias); | ||
| 1829 | } | ||
| 1830 | } | ||
| 1831 | if (meta.lod) { | ||
| 1832 | ASSERT(!meta.bias); | ||
| 1833 | opcode = "TXL"; | ||
| 1834 | |||
| 1835 | if (swizzle < 4) { | ||
| 1836 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1837 | } else { | ||
| 1838 | const std::string lod = AllocTemporary(); | ||
| 1839 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1840 | extra = fmt::format(" {},", lod); | ||
| 1841 | } | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id, | ||
| 1845 | TextureType(meta), BuildAoffi(operation)); | ||
| 1846 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1847 | return fmt::format("{}.x", temporary); | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1851 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1852 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1853 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1854 | |||
| 1855 | std::string comp; | ||
| 1856 | if (!meta.sampler.is_shadow) { | ||
| 1857 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1858 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1862 | TextureType(meta), BuildAoffi(operation)); | ||
| 1863 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1864 | return fmt::format("{}.x", temporary); | ||
| 1865 | } | ||
| 1866 | |||
| 1867 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1868 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1869 | const std::string temporary = AllocVectorTemporary(); | ||
| 1870 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1871 | |||
| 1872 | ASSERT(!meta.sampler.is_array); | ||
| 1873 | |||
| 1874 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1875 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1876 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1877 | return fmt::format("{}.x", temporary); | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1881 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1882 | const std::string temporary = AllocVectorTemporary(); | ||
| 1883 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1884 | |||
| 1885 | ASSERT(!meta.sampler.is_array); | ||
| 1886 | |||
| 1887 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1888 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1889 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1890 | } | ||
| 1891 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1892 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1893 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1894 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1895 | return fmt::format("{}.x", temporary); | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1899 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1900 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1901 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1902 | |||
| 1903 | if (!meta.sampler.is_buffer) { | ||
| 1904 | ASSERT(swizzle < 4); | ||
| 1905 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1906 | } | ||
| 1907 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta), | ||
| 1908 | BuildAoffi(operation)); | ||
| 1909 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1910 | return fmt::format("{}.x", temporary); | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1914 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1915 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1916 | const std::string ddx = AllocVectorTemporary(); | ||
| 1917 | const std::string ddy = AllocVectorTemporary(); | ||
| 1918 | const std::string coord = BuildCoords(operation).first; | ||
| 1919 | |||
| 1920 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1921 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1922 | const char swizzle = Swizzle(index); | ||
| 1923 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1924 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | const std::string_view result = coord; | ||
| 1928 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1929 | TextureType(meta), BuildAoffi(operation)); | ||
| 1930 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1931 | return fmt::format("{}.x", result); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1935 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1936 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1937 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1938 | const std::string_view type = ImageType(meta.image.type); | ||
| 1939 | |||
| 1940 | const std::string temporary = AllocVectorTemporary(); | ||
| 1941 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1942 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1943 | } | ||
| 1944 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1945 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1946 | return fmt::format("{}.x", temporary); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 1950 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1951 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1952 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 1953 | const std::size_t num_values = meta.values.size(); | ||
| 1954 | const std::string_view type = ImageType(meta.image.type); | ||
| 1955 | |||
| 1956 | const std::string coord = AllocVectorTemporary(); | ||
| 1957 | const std::string value = AllocVectorTemporary(); | ||
| 1958 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 1959 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 1960 | } | ||
| 1961 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 1962 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 1963 | } | ||
| 1964 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 1965 | return {}; | ||
| 1966 | } | ||
| 1967 | |||
| 1968 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 1969 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 1970 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 1971 | AddLine("CONT;"); | ||
| 1972 | return {}; | ||
| 1973 | } | ||
| 1974 | |||
| 1975 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 1976 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 1977 | AddLine("CONT;"); | ||
| 1978 | return {}; | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 1982 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1983 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 1984 | const std::string_view stack_name = StackName(stack); | ||
| 1985 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 1986 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1987 | return {}; | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 1991 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1992 | const std::string_view stack_name = StackName(stack); | ||
| 1993 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1994 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 1995 | AddLine("CONT;"); | ||
| 1996 | return {}; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | std::string ARBDecompiler::Exit(Operation) { | ||
| 2000 | Exit(); | ||
| 2001 | return {}; | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | std::string ARBDecompiler::Discard(Operation) { | ||
| 2005 | AddLine("KIL TR;"); | ||
| 2006 | return {}; | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 2010 | AddLine("EMIT;"); | ||
| 2011 | return {}; | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 2015 | AddLine("ENDPRIM;"); | ||
| 2016 | return {}; | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 2020 | return "primitive.invocation"; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2024 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2025 | const std::string temporary = AllocTemporary(); | ||
| 2026 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2027 | return temporary; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2031 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2035 | if (!device.HasWarpIntrinsics()) { | ||
| 2036 | LOG_ERROR(Render_OpenGL, | ||
| 2037 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2038 | return Visit(operation[0]); | ||
| 2039 | } | ||
| 2040 | const std::string temporary = AllocVectorTemporary(); | ||
| 2041 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2042 | Visit(operation[1])); | ||
| 2043 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2044 | return fmt::format("{}.x", temporary); | ||
| 2045 | } | ||
| 2046 | |||
| 2047 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2048 | if (!ir.IsDecompiled()) { | ||
| 2049 | LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); | ||
| 2050 | return {}; | ||
| 2051 | } | ||
| 2052 | AddLine("BAR;"); | ||
| 2053 | return {}; | ||
| 2054 | } | ||
| 2055 | |||
| 2056 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2057 | AddLine("MEMBAR.CTA;"); | ||
| 2058 | return {}; | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2062 | AddLine("MEMBAR;"); | ||
| 2063 | return {}; | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | } // Anonymous namespace | ||
| 2067 | |||
| 2068 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2069 | const VideoCommon::Shader::Registry& registry, | ||
| 2070 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2071 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2072 | } | ||
| 2073 | |||
| 2074 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h new file mode 100644 index 000000000..6afc87220 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 9964ea894..ad0577a4f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -22,13 +22,12 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 22 | 22 | ||
| 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 24 | 24 | ||
| 25 | CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) | 25 | Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { |
| 26 | : VideoCommon::BufferBlock{cpu_addr, size} { | ||
| 27 | gl_buffer.Create(); | 26 | gl_buffer.Create(); |
| 28 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 29 | } | 28 | } |
| 30 | 29 | ||
| 31 | CachedBufferBlock::~CachedBufferBlock() = default; | 30 | Buffer::~Buffer() = default; |
| 32 | 31 | ||
| 33 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 32 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 34 | const Device& device, std::size_t stream_size) | 33 | const Device& device, std::size_t stream_size) |
| @@ -48,12 +47,8 @@ OGLBufferCache::~OGLBufferCache() { | |||
| 48 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); |
| 49 | } | 48 | } |
| 50 | 49 | ||
| 51 | Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 50 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 52 | return std::make_shared<CachedBufferBlock>(cpu_addr, size); | 51 | return std::make_shared<Buffer>(cpu_addr, size); |
| 53 | } | ||
| 54 | |||
| 55 | GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { | ||
| 56 | return buffer->GetHandle(); | ||
| 57 | } | 52 | } |
| 58 | 53 | ||
| 59 | GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { | 54 | GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| @@ -62,7 +57,7 @@ GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { | |||
| 62 | 57 | ||
| 63 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 58 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 64 | const u8* data) { | 59 | const u8* data) { |
| 65 | glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), | 60 | glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), |
| 66 | static_cast<GLsizeiptr>(size), data); | 61 | static_cast<GLsizeiptr>(size), data); |
| 67 | } | 62 | } |
| 68 | 63 | ||
| @@ -70,20 +65,20 @@ void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 70 | u8* data) { | 65 | u8* data) { |
| 71 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | 66 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); |
| 72 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | 67 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); |
| 73 | glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), | 68 | glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), |
| 74 | static_cast<GLsizeiptr>(size), data); | 69 | static_cast<GLsizeiptr>(size), data); |
| 75 | } | 70 | } |
| 76 | 71 | ||
| 77 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | 72 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 78 | std::size_t dst_offset, std::size_t size) { | 73 | std::size_t dst_offset, std::size_t size) { |
| 79 | glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset), | 74 | glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset), |
| 80 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); | 75 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); |
| 81 | } | 76 | } |
| 82 | 77 | ||
| 83 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | 78 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, |
| 84 | std::size_t size) { | 79 | std::size_t size) { |
| 85 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | 80 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); |
| 86 | const GLuint& cbuf = cbufs[cbuf_cursor++]; | 81 | const GLuint cbuf = cbufs[cbuf_cursor++]; |
| 87 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | 82 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); |
| 88 | return {cbuf, 0}; | 83 | return {cbuf, 0}; |
| 89 | } | 84 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 679b9b1d7..a49aaf9c4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -23,17 +23,12 @@ class Device; | |||
| 23 | class OGLStreamBuffer; | 23 | class OGLStreamBuffer; |
| 24 | class RasterizerOpenGL; | 24 | class RasterizerOpenGL; |
| 25 | 25 | ||
| 26 | class CachedBufferBlock; | 26 | class Buffer : public VideoCommon::BufferBlock { |
| 27 | |||
| 28 | using Buffer = std::shared_ptr<CachedBufferBlock>; | ||
| 29 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | ||
| 30 | |||
| 31 | class CachedBufferBlock : public VideoCommon::BufferBlock { | ||
| 32 | public: | 27 | public: |
| 33 | explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); | 28 | explicit Buffer(VAddr cpu_addr, const std::size_t size); |
| 34 | ~CachedBufferBlock(); | 29 | ~Buffer(); |
| 35 | 30 | ||
| 36 | GLuint GetHandle() const { | 31 | GLuint Handle() const { |
| 37 | return gl_buffer.handle; | 32 | return gl_buffer.handle; |
| 38 | } | 33 | } |
| 39 | 34 | ||
| @@ -41,6 +36,7 @@ private: | |||
| 41 | OGLBuffer gl_buffer; | 36 | OGLBuffer gl_buffer; |
| 42 | }; | 37 | }; |
| 43 | 38 | ||
| 39 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | ||
| 44 | class OGLBufferCache final : public GenericBufferCache { | 40 | class OGLBufferCache final : public GenericBufferCache { |
| 45 | public: | 41 | public: |
| 46 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 42 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -54,9 +50,7 @@ public: | |||
| 54 | } | 50 | } |
| 55 | 51 | ||
| 56 | protected: | 52 | protected: |
| 57 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; | 53 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 58 | |||
| 59 | GLuint ToHandle(const Buffer& buffer) override; | ||
| 60 | 54 | ||
| 61 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 55 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 62 | const u8* data) override; | 56 | const u8* data) override; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 890fc6c63..e245e27ec 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -213,6 +213,7 @@ Device::Device() | |||
| 213 | has_component_indexing_bug = is_amd; | 213 | has_component_indexing_bug = is_amd; |
| 214 | has_precise_bug = TestPreciseBug(); | 214 | has_precise_bug = TestPreciseBug(); |
| 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; |
| 216 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | ||
| 216 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 217 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 217 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && | 218 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && |
| 218 | GLAD_GL_NV_transform_feedback2; | 219 | GLAD_GL_NV_transform_feedback2; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 98cca0254..145347943 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -88,6 +88,10 @@ public: | |||
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | bool HasNvViewportArray2() const { | ||
| 92 | return has_nv_viewport_array2; | ||
| 93 | } | ||
| 94 | |||
| 91 | bool UseAssemblyShaders() const { | 95 | bool UseAssemblyShaders() const { |
| 92 | return use_assembly_shaders; | 96 | return use_assembly_shaders; |
| 93 | } | 97 | } |
| @@ -111,6 +115,7 @@ private: | |||
| 111 | bool has_component_indexing_bug{}; | 115 | bool has_component_indexing_bug{}; |
| 112 | bool has_precise_bug{}; | 116 | bool has_precise_bug{}; |
| 113 | bool has_fast_buffer_sub_data{}; | 117 | bool has_fast_buffer_sub_data{}; |
| 118 | bool has_nv_viewport_array2{}; | ||
| 114 | bool use_assembly_shaders{}; | 119 | bool use_assembly_shaders{}; |
| 115 | }; | 120 | }; |
| 116 | 121 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c28486b1d..46e780a06 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 26 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -148,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u | |||
| 148 | auto program = std::make_shared<ProgramHandle>(); | 149 | auto program = std::make_shared<ProgramHandle>(); |
| 149 | 150 | ||
| 150 | if (device.UseAssemblyShaders()) { | 151 | if (device.UseAssemblyShaders()) { |
| 151 | const std::string arb = "Not implemented"; | 152 | const std::string arb = |
| 153 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | ||
| 152 | 154 | ||
| 153 | GLuint& arb_prog = program->assembly_program.handle; | 155 | GLuint& arb_prog = program->assembly_program.handle; |
| 154 | 156 | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 6ec328c53..932a2f69e 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -49,14 +49,6 @@ OGLStreamBuffer::~OGLStreamBuffer() { | |||
| 49 | gl_buffer.Release(); | 49 | gl_buffer.Release(); |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | GLuint OGLStreamBuffer::GetHandle() const { | ||
| 53 | return gl_buffer.handle; | ||
| 54 | } | ||
| 55 | |||
| 56 | GLsizeiptr OGLStreamBuffer::GetSize() const { | ||
| 57 | return buffer_size; | ||
| 58 | } | ||
| 59 | |||
| 60 | std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | 52 | std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { |
| 61 | ASSERT(size <= buffer_size); | 53 | ASSERT(size <= buffer_size); |
| 62 | ASSERT(alignment <= buffer_size); | 54 | ASSERT(alignment <= buffer_size); |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index f8383cbd4..866da3594 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -17,9 +17,6 @@ public: | |||
| 17 | bool use_persistent = true); | 17 | bool use_persistent = true); |
| 18 | ~OGLStreamBuffer(); | 18 | ~OGLStreamBuffer(); |
| 19 | 19 | ||
| 20 | GLuint GetHandle() const; | ||
| 21 | GLsizeiptr GetSize() const; | ||
| 22 | |||
| 23 | /* | 20 | /* |
| 24 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | 21 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes |
| 25 | * and the optional alignment requirement. | 22 | * and the optional alignment requirement. |
| @@ -32,6 +29,14 @@ public: | |||
| 32 | 29 | ||
| 33 | void Unmap(GLsizeiptr size); | 30 | void Unmap(GLsizeiptr size); |
| 34 | 31 | ||
| 32 | GLuint Handle() const { | ||
| 33 | return gl_buffer.handle; | ||
| 34 | } | ||
| 35 | |||
| 36 | GLsizeiptr Size() const { | ||
| 37 | return buffer_size; | ||
| 38 | } | ||
| 39 | |||
| 35 | private: | 40 | private: |
| 36 | OGLBuffer gl_buffer; | 41 | OGLBuffer gl_buffer; |
| 37 | 42 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5f33d9e40..1fde38328 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -37,8 +37,8 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch | |||
| 37 | 37 | ||
| 38 | } // Anonymous namespace | 38 | } // Anonymous namespace |
| 39 | 39 | ||
| 40 | CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, |
| 41 | VAddr cpu_addr, std::size_t size) | 41 | std::size_t size) |
| 42 | : VideoCommon::BufferBlock{cpu_addr, size} { | 42 | : VideoCommon::BufferBlock{cpu_addr, size} { |
| 43 | VkBufferCreateInfo ci; | 43 | VkBufferCreateInfo ci; |
| 44 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; | 44 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| @@ -54,7 +54,7 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me | |||
| 54 | buffer.commit = memory_manager.Commit(buffer.handle, false); | 54 | buffer.commit = memory_manager.Commit(buffer.handle, false); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | CachedBufferBlock::~CachedBufferBlock() = default; | 57 | Buffer::~Buffer() = default; |
| 58 | 58 | ||
| 59 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 59 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 60 | const VKDevice& device, VKMemoryManager& memory_manager, | 60 | const VKDevice& device, VKMemoryManager& memory_manager, |
| @@ -67,12 +67,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S | |||
| 67 | 67 | ||
| 68 | VKBufferCache::~VKBufferCache() = default; | 68 | VKBufferCache::~VKBufferCache() = default; |
| 69 | 69 | ||
| 70 | Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 70 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 71 | return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); | 71 | return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size); |
| 72 | } | ||
| 73 | |||
| 74 | VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) { | ||
| 75 | return buffer->GetHandle(); | ||
| 76 | } | 72 | } |
| 77 | 73 | ||
| 78 | VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { | 74 | VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { |
| @@ -91,7 +87,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st | |||
| 91 | std::memcpy(staging.commit->Map(size), data, size); | 87 | std::memcpy(staging.commit->Map(size), data, size); |
| 92 | 88 | ||
| 93 | scheduler.RequestOutsideRenderPassOperationContext(); | 89 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 94 | scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, | 90 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, |
| 95 | size](vk::CommandBuffer cmdbuf) { | 91 | size](vk::CommandBuffer cmdbuf) { |
| 96 | cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); | 92 | cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); |
| 97 | 93 | ||
| @@ -114,7 +110,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 114 | u8* data) { | 110 | u8* data) { |
| 115 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | 111 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 116 | scheduler.RequestOutsideRenderPassOperationContext(); | 112 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 117 | scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, | 113 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, |
| 118 | size](vk::CommandBuffer cmdbuf) { | 114 | size](vk::CommandBuffer cmdbuf) { |
| 119 | VkBufferMemoryBarrier barrier; | 115 | VkBufferMemoryBarrier barrier; |
| 120 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 116 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| @@ -141,8 +137,8 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 141 | void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | 137 | void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 142 | std::size_t dst_offset, std::size_t size) { | 138 | std::size_t dst_offset, std::size_t size) { |
| 143 | scheduler.RequestOutsideRenderPassOperationContext(); | 139 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 144 | scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, | 140 | scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, |
| 145 | dst_offset, size](vk::CommandBuffer cmdbuf) { | 141 | size](vk::CommandBuffer cmdbuf) { |
| 146 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); | 142 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); |
| 147 | 143 | ||
| 148 | std::array<VkBufferMemoryBarrier, 2> barriers; | 144 | std::array<VkBufferMemoryBarrier, 2> barriers; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 65cb3c8ad..9ebbef835 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -23,13 +23,13 @@ class VKDevice; | |||
| 23 | class VKMemoryManager; | 23 | class VKMemoryManager; |
| 24 | class VKScheduler; | 24 | class VKScheduler; |
| 25 | 25 | ||
| 26 | class CachedBufferBlock final : public VideoCommon::BufferBlock { | 26 | class Buffer final : public VideoCommon::BufferBlock { |
| 27 | public: | 27 | public: |
| 28 | explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 28 | explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, |
| 29 | VAddr cpu_addr, std::size_t size); | 29 | std::size_t size); |
| 30 | ~CachedBufferBlock(); | 30 | ~Buffer(); |
| 31 | 31 | ||
| 32 | VkBuffer GetHandle() const { | 32 | VkBuffer Handle() const { |
| 33 | return *buffer.handle; | 33 | return *buffer.handle; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| @@ -37,8 +37,6 @@ private: | |||
| 37 | VKBuffer buffer; | 37 | VKBuffer buffer; |
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | using Buffer = std::shared_ptr<CachedBufferBlock>; | ||
| 41 | |||
| 42 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | 40 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { |
| 43 | public: | 41 | public: |
| 44 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 42 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| @@ -49,9 +47,7 @@ public: | |||
| 49 | VkBuffer GetEmptyBuffer(std::size_t size) override; | 47 | VkBuffer GetEmptyBuffer(std::size_t size) override; |
| 50 | 48 | ||
| 51 | protected: | 49 | protected: |
| 52 | VkBuffer ToHandle(const Buffer& buffer) override; | 50 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 53 | |||
| 54 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; | ||
| 55 | 51 | ||
| 56 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 52 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 57 | const u8* data) override; | 53 | const u8* data) override; |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index dfddf7ad6..c765c60a0 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -35,7 +35,7 @@ public: | |||
| 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 36 | void Unmap(u64 size); | 36 | void Unmap(u64 size); |
| 37 | 37 | ||
| 38 | VkBuffer GetHandle() const { | 38 | VkBuffer Handle() const { |
| 39 | return *buffer; | 39 | return *buffer; |
| 40 | } | 40 | } |
| 41 | 41 | ||
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 7e9073cc3..32c81dc70 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -631,13 +631,11 @@ void Config::ReadRendererValues() { | |||
| 631 | static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); | 631 | static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); |
| 632 | Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); | 632 | Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); |
| 633 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); | 633 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); |
| 634 | Settings::values.resolution_factor = | ||
| 635 | ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); | ||
| 636 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); | 634 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); |
| 637 | Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); | 635 | Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); |
| 638 | Settings::values.use_frame_limit = | 636 | Settings::values.use_frame_limit = |
| 639 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); | 637 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); |
| 640 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | 638 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt(); |
| 641 | Settings::values.use_disk_shader_cache = | 639 | Settings::values.use_disk_shader_cache = |
| 642 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); | 640 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); |
| 643 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); | 641 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); |
| @@ -722,8 +720,6 @@ void Config::ReadUIValues() { | |||
| 722 | .toString(); | 720 | .toString(); |
| 723 | UISettings::values.enable_discord_presence = | 721 | UISettings::values.enable_discord_presence = |
| 724 | ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); | 722 | ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); |
| 725 | UISettings::values.screenshot_resolution_factor = | ||
| 726 | static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt()); | ||
| 727 | UISettings::values.select_user_on_boot = | 723 | UISettings::values.select_user_on_boot = |
| 728 | ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); | 724 | ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); |
| 729 | 725 | ||
| @@ -1082,8 +1078,6 @@ void Config::SaveRendererValues() { | |||
| 1082 | WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); | 1078 | WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); |
| 1083 | WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); | 1079 | WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); |
| 1084 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); | 1080 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); |
| 1085 | WriteSetting(QStringLiteral("resolution_factor"), | ||
| 1086 | static_cast<double>(Settings::values.resolution_factor), 1.0); | ||
| 1087 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); | 1081 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); |
| 1088 | WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); | 1082 | WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); |
| 1089 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); | 1083 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); |
| @@ -1159,8 +1153,6 @@ void Config::SaveUIValues() { | |||
| 1159 | QString::fromUtf8(UISettings::themes[0].second)); | 1153 | QString::fromUtf8(UISettings::themes[0].second)); |
| 1160 | WriteSetting(QStringLiteral("enable_discord_presence"), | 1154 | WriteSetting(QStringLiteral("enable_discord_presence"), |
| 1161 | UISettings::values.enable_discord_presence, true); | 1155 | UISettings::values.enable_discord_presence, true); |
| 1162 | WriteSetting(QStringLiteral("screenshot_resolution_factor"), | ||
| 1163 | UISettings::values.screenshot_resolution_factor, 0); | ||
| 1164 | WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, | 1156 | WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, |
| 1165 | false); | 1157 | false); |
| 1166 | 1158 | ||
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index ea667caef..304625cd7 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -19,47 +19,6 @@ | |||
| 19 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 19 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 20 | #endif | 20 | #endif |
| 21 | 21 | ||
| 22 | namespace { | ||
| 23 | enum class Resolution : int { | ||
| 24 | Auto, | ||
| 25 | Scale1x, | ||
| 26 | Scale2x, | ||
| 27 | Scale3x, | ||
| 28 | Scale4x, | ||
| 29 | }; | ||
| 30 | |||
| 31 | float ToResolutionFactor(Resolution option) { | ||
| 32 | switch (option) { | ||
| 33 | case Resolution::Auto: | ||
| 34 | return 0.f; | ||
| 35 | case Resolution::Scale1x: | ||
| 36 | return 1.f; | ||
| 37 | case Resolution::Scale2x: | ||
| 38 | return 2.f; | ||
| 39 | case Resolution::Scale3x: | ||
| 40 | return 3.f; | ||
| 41 | case Resolution::Scale4x: | ||
| 42 | return 4.f; | ||
| 43 | } | ||
| 44 | return 0.f; | ||
| 45 | } | ||
| 46 | |||
| 47 | Resolution FromResolutionFactor(float factor) { | ||
| 48 | if (factor == 0.f) { | ||
| 49 | return Resolution::Auto; | ||
| 50 | } else if (factor == 1.f) { | ||
| 51 | return Resolution::Scale1x; | ||
| 52 | } else if (factor == 2.f) { | ||
| 53 | return Resolution::Scale2x; | ||
| 54 | } else if (factor == 3.f) { | ||
| 55 | return Resolution::Scale3x; | ||
| 56 | } else if (factor == 4.f) { | ||
| 57 | return Resolution::Scale4x; | ||
| 58 | } | ||
| 59 | return Resolution::Auto; | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | ConfigureGraphics::ConfigureGraphics(QWidget* parent) | 22 | ConfigureGraphics::ConfigureGraphics(QWidget* parent) |
| 64 | : QWidget(parent), ui(new Ui::ConfigureGraphics) { | 23 | : QWidget(parent), ui(new Ui::ConfigureGraphics) { |
| 65 | vulkan_device = Settings::values.vulkan_device; | 24 | vulkan_device = Settings::values.vulkan_device; |
| @@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 99 | 58 | ||
| 100 | ui->api->setEnabled(runtime_lock); | 59 | ui->api->setEnabled(runtime_lock); |
| 101 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); | 60 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); |
| 102 | ui->resolution_factor_combobox->setCurrentIndex( | ||
| 103 | static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); | ||
| 104 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); | 61 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); |
| 105 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 62 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 106 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 63 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| @@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 114 | void ConfigureGraphics::ApplyConfiguration() { | 71 | void ConfigureGraphics::ApplyConfiguration() { |
| 115 | Settings::values.renderer_backend = GetCurrentGraphicsBackend(); | 72 | Settings::values.renderer_backend = GetCurrentGraphicsBackend(); |
| 116 | Settings::values.vulkan_device = vulkan_device; | 73 | Settings::values.vulkan_device = vulkan_device; |
| 117 | Settings::values.resolution_factor = | ||
| 118 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); | ||
| 119 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); | 74 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); |
| 120 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 75 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 121 | Settings::values.use_asynchronous_gpu_emulation = | 76 | Settings::values.use_asynchronous_gpu_emulation = |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index c816d6108..6e75447a5 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -85,46 +85,6 @@ | |||
| 85 | </widget> | 85 | </widget> |
| 86 | </item> | 86 | </item> |
| 87 | <item> | 87 | <item> |
| 88 | <layout class="QHBoxLayout" name="horizontalLayout_2"> | ||
| 89 | <item> | ||
| 90 | <widget class="QLabel" name="label"> | ||
| 91 | <property name="text"> | ||
| 92 | <string>Internal Resolution:</string> | ||
| 93 | </property> | ||
| 94 | </widget> | ||
| 95 | </item> | ||
| 96 | <item> | ||
| 97 | <widget class="QComboBox" name="resolution_factor_combobox"> | ||
| 98 | <item> | ||
| 99 | <property name="text"> | ||
| 100 | <string>Auto (Window Size)</string> | ||
| 101 | </property> | ||
| 102 | </item> | ||
| 103 | <item> | ||
| 104 | <property name="text"> | ||
| 105 | <string>Native (1280x720)</string> | ||
| 106 | </property> | ||
| 107 | </item> | ||
| 108 | <item> | ||
| 109 | <property name="text"> | ||
| 110 | <string>2x Native (2560x1440)</string> | ||
| 111 | </property> | ||
| 112 | </item> | ||
| 113 | <item> | ||
| 114 | <property name="text"> | ||
| 115 | <string>3x Native (3840x2160)</string> | ||
| 116 | </property> | ||
| 117 | </item> | ||
| 118 | <item> | ||
| 119 | <property name="text"> | ||
| 120 | <string>4x Native (5120x2880)</string> | ||
| 121 | </property> | ||
| 122 | </item> | ||
| 123 | </widget> | ||
| 124 | </item> | ||
| 125 | </layout> | ||
| 126 | </item> | ||
| 127 | <item> | ||
| 128 | <layout class="QHBoxLayout" name="horizontalLayout_6"> | 88 | <layout class="QHBoxLayout" name="horizontalLayout_6"> |
| 129 | <item> | 89 | <item> |
| 130 | <widget class="QLabel" name="ar_label"> | 90 | <widget class="QLabel" name="ar_label"> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 37aadf7f8..be5006ad3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) | |||
| 12 | 12 | ||
| 13 | ui->setupUi(this); | 13 | ui->setupUi(this); |
| 14 | 14 | ||
| 15 | // TODO: Remove this after assembly shaders are fully integrated | ||
| 16 | ui->use_assembly_shaders->setVisible(false); | ||
| 17 | |||
| 18 | SetConfiguration(); | 15 | SetConfiguration(); |
| 19 | } | 16 | } |
| 20 | 17 | ||
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 270cccc77..4119d7907 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -689,10 +689,7 @@ void GMainWindow::InitializeHotkeys() { | |||
| 689 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; | 689 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; |
| 690 | UpdateStatusBar(); | 690 | UpdateStatusBar(); |
| 691 | }); | 691 | }); |
| 692 | // TODO: Remove this comment/static whenever the next major release of | 692 | constexpr u16 SPEED_LIMIT_STEP = 5; |
| 693 | // MSVC occurs and we make it a requirement (see: | ||
| 694 | // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html) | ||
| 695 | static constexpr u16 SPEED_LIMIT_STEP = 5; | ||
| 696 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), | 693 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), |
| 697 | &QShortcut::activated, this, [&] { | 694 | &QShortcut::activated, this, [&] { |
| 698 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { | 695 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 7240270f5..659b9f701 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -380,8 +380,6 @@ void Config::ReadValues() { | |||
| 380 | Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); | 380 | Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); |
| 381 | Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); | 381 | Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); |
| 382 | 382 | ||
| 383 | Settings::values.resolution_factor = | ||
| 384 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | ||
| 385 | Settings::values.aspect_ratio = | 383 | Settings::values.aspect_ratio = |
| 386 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 384 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 387 | Settings::values.max_anisotropy = | 385 | Settings::values.max_anisotropy = |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 6f53e9659..45c07ed5d 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -117,11 +117,6 @@ use_hw_renderer = | |||
| 117 | # 0: Interpreter (slow), 1 (default): JIT (fast) | 117 | # 0: Interpreter (slow), 1 (default): JIT (fast) |
| 118 | use_shader_jit = | 118 | use_shader_jit = |
| 119 | 119 | ||
| 120 | # Resolution scale factor | ||
| 121 | # 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale | ||
| 122 | # factor for the Switch resolution | ||
| 123 | resolution_factor = | ||
| 124 | |||
| 125 | # Aspect ratio | 120 | # Aspect ratio |
| 126 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 121 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 127 | aspect_ratio = | 122 | aspect_ratio = |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 3be58b15d..1566c2e3f 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -116,8 +116,6 @@ void Config::ReadValues() { | |||
| 116 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 116 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 117 | 117 | ||
| 118 | // Renderer | 118 | // Renderer |
| 119 | Settings::values.resolution_factor = | ||
| 120 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | ||
| 121 | Settings::values.aspect_ratio = | 119 | Settings::values.aspect_ratio = |
| 122 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 120 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 123 | Settings::values.max_anisotropy = | 121 | Settings::values.max_anisotropy = |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index ca203b64d..41bbbbf60 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -21,11 +21,6 @@ use_hw_renderer = | |||
| 21 | # 0: Interpreter (slow), 1 (default): JIT (fast) | 21 | # 0: Interpreter (slow), 1 (default): JIT (fast) |
| 22 | use_shader_jit = | 22 | use_shader_jit = |
| 23 | 23 | ||
| 24 | # Resolution scale factor | ||
| 25 | # 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale | ||
| 26 | # factor for the Switch resolution | ||
| 27 | resolution_factor = | ||
| 28 | |||
| 29 | # Aspect ratio | 24 | # Aspect ratio |
| 30 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 25 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 31 | aspect_ratio = | 26 | aspect_ratio = |