diff options
| -rw-r--r-- | src/core/hle/service/nvflinger/nvflinger.cpp | 32 | ||||
| -rw-r--r-- | src/core/hle/service/nvflinger/nvflinger.h | 9 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 8 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 183 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 71 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 70 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.h | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 116 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 140 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 92 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 53 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 36 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 38 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 76 |
18 files changed, 788 insertions, 248 deletions
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 6a613aeab..6db2cce41 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | 7 | ||
| 8 | #include "common/alignment.h" | ||
| 9 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 10 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 11 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| @@ -22,7 +21,6 @@ | |||
| 22 | #include "core/hle/service/nvflinger/nvflinger.h" | 21 | #include "core/hle/service/nvflinger/nvflinger.h" |
| 23 | #include "core/perf_stats.h" | 22 | #include "core/perf_stats.h" |
| 24 | #include "video_core/renderer_base.h" | 23 | #include "video_core/renderer_base.h" |
| 25 | #include "video_core/video_core.h" | ||
| 26 | 24 | ||
| 27 | namespace Service::NVFlinger { | 25 | namespace Service::NVFlinger { |
| 28 | 26 | ||
| @@ -30,12 +28,6 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60; | |||
| 30 | constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); | 28 | constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); |
| 31 | 29 | ||
| 32 | NVFlinger::NVFlinger() { | 30 | NVFlinger::NVFlinger() { |
| 33 | // Add the different displays to the list of displays. | ||
| 34 | displays.emplace_back(0, "Default"); | ||
| 35 | displays.emplace_back(1, "External"); | ||
| 36 | displays.emplace_back(2, "Edid"); | ||
| 37 | displays.emplace_back(3, "Internal"); | ||
| 38 | |||
| 39 | // Schedule the screen composition events | 31 | // Schedule the screen composition events |
| 40 | composition_event = | 32 | composition_event = |
| 41 | CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { | 33 | CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { |
| @@ -55,13 +47,13 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { | |||
| 55 | } | 47 | } |
| 56 | 48 | ||
| 57 | u64 NVFlinger::OpenDisplay(std::string_view name) { | 49 | u64 NVFlinger::OpenDisplay(std::string_view name) { |
| 58 | LOG_WARNING(Service, "Opening display {}", name); | 50 | LOG_DEBUG(Service, "Opening \"{}\" display", name); |
| 59 | 51 | ||
| 60 | // TODO(Subv): Currently we only support the Default display. | 52 | // TODO(Subv): Currently we only support the Default display. |
| 61 | ASSERT(name == "Default"); | 53 | ASSERT(name == "Default"); |
| 62 | 54 | ||
| 63 | auto itr = std::find_if(displays.begin(), displays.end(), | 55 | const auto itr = std::find_if(displays.begin(), displays.end(), |
| 64 | [&](const Display& display) { return display.name == name; }); | 56 | [&](const Display& display) { return display.name == name; }); |
| 65 | 57 | ||
| 66 | ASSERT(itr != displays.end()); | 58 | ASSERT(itr != displays.end()); |
| 67 | 59 | ||
| @@ -73,8 +65,8 @@ u64 NVFlinger::CreateLayer(u64 display_id) { | |||
| 73 | 65 | ||
| 74 | ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment"); | 66 | ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment"); |
| 75 | 67 | ||
| 76 | u64 layer_id = next_layer_id++; | 68 | const u64 layer_id = next_layer_id++; |
| 77 | u32 buffer_queue_id = next_buffer_queue_id++; | 69 | const u32 buffer_queue_id = next_buffer_queue_id++; |
| 78 | auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); | 70 | auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); |
| 79 | display.layers.emplace_back(layer_id, buffer_queue); | 71 | display.layers.emplace_back(layer_id, buffer_queue); |
| 80 | buffer_queues.emplace_back(std::move(buffer_queue)); | 72 | buffer_queues.emplace_back(std::move(buffer_queue)); |
| @@ -91,16 +83,16 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id | |||
| 91 | } | 83 | } |
| 92 | 84 | ||
| 93 | std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const { | 85 | std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const { |
| 94 | auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), | 86 | const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), |
| 95 | [&](const auto& queue) { return queue->GetId() == id; }); | 87 | [&](const auto& queue) { return queue->GetId() == id; }); |
| 96 | 88 | ||
| 97 | ASSERT(itr != buffer_queues.end()); | 89 | ASSERT(itr != buffer_queues.end()); |
| 98 | return *itr; | 90 | return *itr; |
| 99 | } | 91 | } |
| 100 | 92 | ||
| 101 | Display& NVFlinger::GetDisplay(u64 display_id) { | 93 | Display& NVFlinger::GetDisplay(u64 display_id) { |
| 102 | auto itr = std::find_if(displays.begin(), displays.end(), | 94 | const auto itr = std::find_if(displays.begin(), displays.end(), |
| 103 | [&](const Display& display) { return display.id == display_id; }); | 95 | [&](const Display& display) { return display.id == display_id; }); |
| 104 | 96 | ||
| 105 | ASSERT(itr != displays.end()); | 97 | ASSERT(itr != displays.end()); |
| 106 | return *itr; | 98 | return *itr; |
| @@ -109,8 +101,8 @@ Display& NVFlinger::GetDisplay(u64 display_id) { | |||
| 109 | Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) { | 101 | Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) { |
| 110 | auto& display = GetDisplay(display_id); | 102 | auto& display = GetDisplay(display_id); |
| 111 | 103 | ||
| 112 | auto itr = std::find_if(display.layers.begin(), display.layers.end(), | 104 | const auto itr = std::find_if(display.layers.begin(), display.layers.end(), |
| 113 | [&](const Layer& layer) { return layer.id == layer_id; }); | 105 | [&](const Layer& layer) { return layer.id == layer_id; }); |
| 114 | 106 | ||
| 115 | ASSERT(itr != display.layers.end()); | 107 | ASSERT(itr != display.layers.end()); |
| 116 | return *itr; | 108 | return *itr; |
| @@ -145,7 +137,7 @@ void NVFlinger::Compose() { | |||
| 145 | continue; | 137 | continue; |
| 146 | } | 138 | } |
| 147 | 139 | ||
| 148 | auto& igbp_buffer = buffer->get().igbp_buffer; | 140 | const auto& igbp_buffer = buffer->get().igbp_buffer; |
| 149 | 141 | ||
| 150 | // Now send the buffer to the GPU for drawing. | 142 | // Now send the buffer to the GPU for drawing. |
| 151 | // TODO(Subv): Support more than just disp0. The display device selection is probably based | 143 | // TODO(Subv): Support more than just disp0. The display device selection is probably based |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 9abba555b..8f9a0a7f8 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| @@ -84,7 +85,13 @@ private: | |||
| 84 | 85 | ||
| 85 | std::shared_ptr<Nvidia::Module> nvdrv; | 86 | std::shared_ptr<Nvidia::Module> nvdrv; |
| 86 | 87 | ||
| 87 | std::vector<Display> displays; | 88 | std::array<Display, 5> displays{{ |
| 89 | {0, "Default"}, | ||
| 90 | {1, "External"}, | ||
| 91 | {2, "Edid"}, | ||
| 92 | {3, "Internal"}, | ||
| 93 | {4, "Null"}, | ||
| 94 | }}; | ||
| 88 | std::vector<std::shared_ptr<BufferQueue>> buffer_queues; | 95 | std::vector<std::shared_ptr<BufferQueue>> buffer_queues; |
| 89 | 96 | ||
| 90 | /// Id to use for the next layer that is created, this counter is shared among all displays. | 97 | /// Id to use for the next layer that is created, this counter is shared among all displays. |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 509ca117a..6113e17ff 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -87,6 +87,7 @@ add_library(video_core STATIC | |||
| 87 | shader/decode.cpp | 87 | shader/decode.cpp |
| 88 | shader/shader_ir.cpp | 88 | shader/shader_ir.cpp |
| 89 | shader/shader_ir.h | 89 | shader/shader_ir.h |
| 90 | shader/track.cpp | ||
| 90 | surface.cpp | 91 | surface.cpp |
| 91 | surface.h | 92 | surface.h |
| 92 | textures/astc.cpp | 93 | textures/astc.cpp |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cdef97bc6..9989825f8 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -208,6 +208,8 @@ enum class UniformType : u64 { | |||
| 208 | SignedShort = 3, | 208 | SignedShort = 3, |
| 209 | Single = 4, | 209 | Single = 4, |
| 210 | Double = 5, | 210 | Double = 5, |
| 211 | Quad = 6, | ||
| 212 | UnsignedQuad = 7, | ||
| 211 | }; | 213 | }; |
| 212 | 214 | ||
| 213 | enum class StoreType : u64 { | 215 | enum class StoreType : u64 { |
| @@ -785,6 +787,12 @@ union Instruction { | |||
| 785 | } st_l; | 787 | } st_l; |
| 786 | 788 | ||
| 787 | union { | 789 | union { |
| 790 | BitField<48, 3, UniformType> type; | ||
| 791 | BitField<46, 2, u64> cache_mode; | ||
| 792 | BitField<20, 24, s64> immediate_offset; | ||
| 793 | } ldg; | ||
| 794 | |||
| 795 | union { | ||
| 788 | BitField<0, 3, u64> pred0; | 796 | BitField<0, 3, u64> pred0; |
| 789 | BitField<3, 3, u64> pred3; | 797 | BitField<3, 3, u64> pred3; |
| 790 | BitField<7, 1, u64> abs_a; | 798 | BitField<7, 1, u64> abs_a; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08cf6268f..d3d32a359 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/memory.h" | ||
| 6 | #include "video_core/engines/fermi_2d.h" | 8 | #include "video_core/engines/fermi_2d.h" |
| 7 | #include "video_core/engines/kepler_memory.h" | 9 | #include "video_core/engines/kepler_memory.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| @@ -124,9 +126,36 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) { | |||
| 124 | } | 126 | } |
| 125 | } | 127 | } |
| 126 | 128 | ||
| 129 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 130 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 131 | // So the values you see in docs might be multiplied by 4. | ||
| 127 | enum class BufferMethods { | 132 | enum class BufferMethods { |
| 128 | BindObject = 0, | 133 | BindObject = 0x0, |
| 129 | CountBufferMethods = 0x40, | 134 | Nop = 0x2, |
| 135 | SemaphoreAddressHigh = 0x4, | ||
| 136 | SemaphoreAddressLow = 0x5, | ||
| 137 | SemaphoreSequence = 0x6, | ||
| 138 | SemaphoreTrigger = 0x7, | ||
| 139 | NotifyIntr = 0x8, | ||
| 140 | WrcacheFlush = 0x9, | ||
| 141 | Unk28 = 0xA, | ||
| 142 | Unk2c = 0xB, | ||
| 143 | RefCnt = 0x14, | ||
| 144 | SemaphoreAcquire = 0x1A, | ||
| 145 | SemaphoreRelease = 0x1B, | ||
| 146 | Unk70 = 0x1C, | ||
| 147 | Unk74 = 0x1D, | ||
| 148 | Unk78 = 0x1E, | ||
| 149 | Unk7c = 0x1F, | ||
| 150 | Yield = 0x20, | ||
| 151 | NonPullerMethods = 0x40, | ||
| 152 | }; | ||
| 153 | |||
| 154 | enum class GpuSemaphoreOperation { | ||
| 155 | AcquireEqual = 0x1, | ||
| 156 | WriteLong = 0x2, | ||
| 157 | AcquireGequal = 0x4, | ||
| 158 | AcquireMask = 0x8, | ||
| 130 | }; | 159 | }; |
| 131 | 160 | ||
| 132 | void GPU::CallMethod(const MethodCall& method_call) { | 161 | void GPU::CallMethod(const MethodCall& method_call) { |
| @@ -135,20 +164,78 @@ void GPU::CallMethod(const MethodCall& method_call) { | |||
| 135 | 164 | ||
| 136 | ASSERT(method_call.subchannel < bound_engines.size()); | 165 | ASSERT(method_call.subchannel < bound_engines.size()); |
| 137 | 166 | ||
| 138 | if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) { | 167 | if (ExecuteMethodOnEngine(method_call)) { |
| 139 | // Bind the current subchannel to the desired engine id. | 168 | CallEngineMethod(method_call); |
| 140 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | 169 | } else { |
| 141 | method_call.argument); | 170 | CallPullerMethod(method_call); |
| 142 | bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); | ||
| 143 | return; | ||
| 144 | } | 171 | } |
| 172 | } | ||
| 173 | |||
| 174 | bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) { | ||
| 175 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 176 | return method >= BufferMethods::NonPullerMethods; | ||
| 177 | } | ||
| 145 | 178 | ||
| 146 | if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) { | 179 | void GPU::CallPullerMethod(const MethodCall& method_call) { |
| 147 | // TODO(Subv): Research and implement these methods. | 180 | regs.reg_array[method_call.method] = method_call.argument; |
| 148 | LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); | 181 | const auto method = static_cast<BufferMethods>(method_call.method); |
| 149 | return; | 182 | |
| 183 | switch (method) { | ||
| 184 | case BufferMethods::BindObject: { | ||
| 185 | ProcessBindMethod(method_call); | ||
| 186 | break; | ||
| 187 | } | ||
| 188 | case BufferMethods::Nop: | ||
| 189 | case BufferMethods::SemaphoreAddressHigh: | ||
| 190 | case BufferMethods::SemaphoreAddressLow: | ||
| 191 | case BufferMethods::SemaphoreSequence: | ||
| 192 | case BufferMethods::RefCnt: | ||
| 193 | break; | ||
| 194 | case BufferMethods::SemaphoreTrigger: { | ||
| 195 | ProcessSemaphoreTriggerMethod(); | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | case BufferMethods::NotifyIntr: { | ||
| 199 | // TODO(Kmather73): Research and implement this method. | ||
| 200 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case BufferMethods::WrcacheFlush: { | ||
| 204 | // TODO(Kmather73): Research and implement this method. | ||
| 205 | LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); | ||
| 206 | break; | ||
| 207 | } | ||
| 208 | case BufferMethods::Unk28: { | ||
| 209 | // TODO(Kmather73): Research and implement this method. | ||
| 210 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 211 | break; | ||
| 212 | } | ||
| 213 | case BufferMethods::Unk2c: { | ||
| 214 | // TODO(Kmather73): Research and implement this method. | ||
| 215 | LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); | ||
| 216 | break; | ||
| 217 | } | ||
| 218 | case BufferMethods::SemaphoreAcquire: { | ||
| 219 | ProcessSemaphoreAcquire(); | ||
| 220 | break; | ||
| 150 | } | 221 | } |
| 222 | case BufferMethods::SemaphoreRelease: { | ||
| 223 | ProcessSemaphoreRelease(); | ||
| 224 | break; | ||
| 225 | } | ||
| 226 | case BufferMethods::Yield: { | ||
| 227 | // TODO(Kmather73): Research and implement this method. | ||
| 228 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | default: | ||
| 232 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", | ||
| 233 | static_cast<u32>(method)); | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | } | ||
| 151 | 237 | ||
| 238 | void GPU::CallEngineMethod(const MethodCall& method_call) { | ||
| 152 | const EngineID engine = bound_engines[method_call.subchannel]; | 239 | const EngineID engine = bound_engines[method_call.subchannel]; |
| 153 | 240 | ||
| 154 | switch (engine) { | 241 | switch (engine) { |
| @@ -172,4 +259,76 @@ void GPU::CallMethod(const MethodCall& method_call) { | |||
| 172 | } | 259 | } |
| 173 | } | 260 | } |
| 174 | 261 | ||
| 262 | void GPU::ProcessBindMethod(const MethodCall& method_call) { | ||
| 263 | // Bind the current subchannel to the desired engine id. | ||
| 264 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 265 | method_call.argument); | ||
| 266 | bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); | ||
| 267 | } | ||
| 268 | |||
| 269 | void GPU::ProcessSemaphoreTriggerMethod() { | ||
| 270 | const auto semaphoreOperationMask = 0xF; | ||
| 271 | const auto op = | ||
| 272 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 273 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 274 | auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 275 | struct Block { | ||
| 276 | u32 sequence; | ||
| 277 | u32 zeros = 0; | ||
| 278 | u64 timestamp; | ||
| 279 | }; | ||
| 280 | |||
| 281 | Block block{}; | ||
| 282 | block.sequence = regs.semaphore_sequence; | ||
| 283 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 284 | // CoreTiming | ||
| 285 | block.timestamp = CoreTiming::GetTicks(); | ||
| 286 | Memory::WriteBlock(*address, &block, sizeof(block)); | ||
| 287 | } else { | ||
| 288 | const auto address = | ||
| 289 | memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 290 | const u32 word = Memory::Read32(*address); | ||
| 291 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 292 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 293 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 294 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 295 | // Nothing to do in this case | ||
| 296 | } else { | ||
| 297 | regs.acquire_source = true; | ||
| 298 | regs.acquire_value = regs.semaphore_sequence; | ||
| 299 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 300 | regs.acquire_active = true; | ||
| 301 | regs.acquire_mode = false; | ||
| 302 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 303 | regs.acquire_active = true; | ||
| 304 | regs.acquire_mode = true; | ||
| 305 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 306 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 307 | // semaphore_sequence, gives a non-0 result | ||
| 308 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 309 | } else { | ||
| 310 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 311 | } | ||
| 312 | } | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | void GPU::ProcessSemaphoreRelease() { | ||
| 317 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 318 | Memory::Write32(*address, regs.semaphore_release); | ||
| 319 | } | ||
| 320 | |||
| 321 | void GPU::ProcessSemaphoreAcquire() { | ||
| 322 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 323 | const u32 word = Memory::Read32(*address); | ||
| 324 | const auto value = regs.semaphore_acquire; | ||
| 325 | if (word != value) { | ||
| 326 | regs.acquire_active = true; | ||
| 327 | regs.acquire_value = value; | ||
| 328 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 329 | regs.acquire_mode = false; | ||
| 330 | regs.acquire_source = false; | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 175 | } // namespace Tegra | 334 | } // namespace Tegra |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index af5ccd1e9..fb8975811 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -156,6 +156,46 @@ public: | |||
| 156 | /// Returns a const reference to the GPU DMA pusher. | 156 | /// Returns a const reference to the GPU DMA pusher. |
| 157 | const Tegra::DmaPusher& DmaPusher() const; | 157 | const Tegra::DmaPusher& DmaPusher() const; |
| 158 | 158 | ||
| 159 | struct Regs { | ||
| 160 | static constexpr size_t NUM_REGS = 0x100; | ||
| 161 | |||
| 162 | union { | ||
| 163 | struct { | ||
| 164 | INSERT_PADDING_WORDS(0x4); | ||
| 165 | struct { | ||
| 166 | u32 address_high; | ||
| 167 | u32 address_low; | ||
| 168 | |||
| 169 | GPUVAddr SmaphoreAddress() const { | ||
| 170 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 171 | address_low); | ||
| 172 | } | ||
| 173 | } smaphore_address; | ||
| 174 | |||
| 175 | u32 semaphore_sequence; | ||
| 176 | u32 semaphore_trigger; | ||
| 177 | INSERT_PADDING_WORDS(0xC); | ||
| 178 | |||
| 179 | // The puser and the puller share the reference counter, the pusher only has read | ||
| 180 | // access | ||
| 181 | u32 reference_count; | ||
| 182 | INSERT_PADDING_WORDS(0x5); | ||
| 183 | |||
| 184 | u32 semaphore_acquire; | ||
| 185 | u32 semaphore_release; | ||
| 186 | INSERT_PADDING_WORDS(0xE4); | ||
| 187 | |||
| 188 | // Puller state | ||
| 189 | u32 acquire_mode; | ||
| 190 | u32 acquire_source; | ||
| 191 | u32 acquire_active; | ||
| 192 | u32 acquire_timeout; | ||
| 193 | u32 acquire_value; | ||
| 194 | }; | ||
| 195 | std::array<u32, NUM_REGS> reg_array; | ||
| 196 | }; | ||
| 197 | } regs{}; | ||
| 198 | |||
| 159 | private: | 199 | private: |
| 160 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 200 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 161 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 201 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| @@ -173,6 +213,37 @@ private: | |||
| 173 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 213 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 174 | /// Inline memory engine | 214 | /// Inline memory engine |
| 175 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 215 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 216 | |||
| 217 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 218 | void ProcessSemaphoreTriggerMethod(); | ||
| 219 | void ProcessSemaphoreRelease(); | ||
| 220 | void ProcessSemaphoreAcquire(); | ||
| 221 | |||
| 222 | // Calls a GPU puller method. | ||
| 223 | void CallPullerMethod(const MethodCall& method_call); | ||
| 224 | // Calls a GPU engine method. | ||
| 225 | void CallEngineMethod(const MethodCall& method_call); | ||
| 226 | // Determines where the method should be executed. | ||
| 227 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 176 | }; | 228 | }; |
| 177 | 229 | ||
| 230 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 231 | static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ | ||
| 232 | "Field " #field_name " has invalid position") | ||
| 233 | |||
| 234 | ASSERT_REG_POSITION(smaphore_address, 0x4); | ||
| 235 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 236 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 237 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 238 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 239 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 240 | |||
| 241 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 242 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 243 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 244 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 245 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 246 | |||
| 247 | #undef ASSERT_REG_POSITION | ||
| 248 | |||
| 178 | } // namespace Tegra | 249 | } // namespace Tegra |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 7992b82c4..c7f32feaa 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp | |||
| @@ -4,8 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #include <glad/glad.h> | 5 | #include <glad/glad.h> |
| 6 | 6 | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/logging/log.h" | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "core/memory.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_global_cache.h" | 11 | #include "video_core/renderer_opengl/gl_global_cache.h" |
| 8 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 9 | #include "video_core/renderer_opengl/utils.h" | 14 | #include "video_core/renderer_opengl/utils.h" |
| 10 | 15 | ||
| 11 | namespace OpenGL { | 16 | namespace OpenGL { |
| @@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{ | |||
| 18 | LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); | 23 | LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); |
| 19 | } | 24 | } |
| 20 | 25 | ||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); | ||
| 28 | |||
| 29 | size = size_; | ||
| 30 | if (size > max_size) { | ||
| 31 | size = max_size; | ||
| 32 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, | ||
| 33 | max_size); | ||
| 34 | } | ||
| 35 | |||
| 36 | // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer | ||
| 37 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); | ||
| 38 | glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { | ||
| 50 | GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; | ||
| 51 | if (!region) { | ||
| 52 | // No reserved surface available, create a new one and reserve it | ||
| 53 | region = std::make_shared<CachedGlobalRegion>(addr, size); | ||
| 54 | ReserveGlobalRegion(region); | ||
| 55 | } | ||
| 56 | region->Reload(size); | ||
| 57 | return region; | ||
| 58 | } | ||
| 59 | |||
| 60 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { | ||
| 61 | reserve[region->GetAddr()] = region; | ||
| 62 | } | ||
| 63 | |||
| 21 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | 64 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) |
| 22 | : RasterizerCache{rasterizer} {} | 65 | : RasterizerCache{rasterizer} {} |
| 23 | 66 | ||
| 67 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 68 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 69 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 70 | |||
| 71 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 72 | const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; | ||
| 73 | const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( | ||
| 74 | cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); | ||
| 75 | ASSERT(cbuf_addr); | ||
| 76 | |||
| 77 | const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); | ||
| 78 | const auto size = Memory::Read32(*cbuf_addr + 8); | ||
| 79 | const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu); | ||
| 80 | ASSERT(actual_addr); | ||
| 81 | |||
| 82 | // Look up global region in the cache based on address | ||
| 83 | GlobalRegion region = TryGet(*actual_addr); | ||
| 84 | |||
| 85 | if (!region) { | ||
| 86 | // No global region found - create a new one | ||
| 87 | region = GetUncachedGlobalRegion(*actual_addr, size); | ||
| 88 | Register(region); | ||
| 89 | } | ||
| 90 | |||
| 91 | return region; | ||
| 92 | } | ||
| 93 | |||
| 24 | } // namespace OpenGL | 94 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 406a735bc..37830bb7c 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h | |||
| @@ -5,9 +5,13 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <unordered_map> | ||
| 9 | |||
| 8 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 9 | 11 | ||
| 12 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/rasterizer_cache.h" | 15 | #include "video_core/rasterizer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | 17 | ||
| @@ -40,6 +44,9 @@ public: | |||
| 40 | return buffer.handle; | 44 | return buffer.handle; |
| 41 | } | 45 | } |
| 42 | 46 | ||
| 47 | /// Reloads the global region from guest memory | ||
| 48 | void Reload(u32 size_); | ||
| 49 | |||
| 43 | // TODO(Rodrigo): When global memory is written (STG), implement flushing | 50 | // TODO(Rodrigo): When global memory is written (STG), implement flushing |
| 44 | void Flush() override { | 51 | void Flush() override { |
| 45 | UNIMPLEMENTED(); | 52 | UNIMPLEMENTED(); |
| @@ -55,6 +62,17 @@ private: | |||
| 55 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | 62 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { |
| 56 | public: | 63 | public: |
| 57 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | 64 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); |
| 65 | |||
| 66 | /// Gets the current specified shader stage program | ||
| 67 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 68 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 69 | |||
| 70 | private: | ||
| 71 | GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; | ||
| 72 | GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); | ||
| 73 | void ReserveGlobalRegion(const GlobalRegion& region); | ||
| 74 | |||
| 75 | std::unordered_map<VAddr, GlobalRegion> reserve; | ||
| 58 | }; | 76 | }; |
| 59 | 77 | ||
| 60 | } // namespace OpenGL | 78 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 71829fee0..ee313cb2f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -297,10 +297,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 297 | MICROPROFILE_SCOPE(OpenGL_Shader); | 297 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 298 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 298 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 299 | 299 | ||
| 300 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL | 300 | BaseBindings base_bindings; |
| 301 | // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. | ||
| 302 | u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||
| 303 | u32 current_texture_bindpoint = 0; | ||
| 304 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 301 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 305 | 302 | ||
| 306 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 303 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -324,43 +321,35 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 324 | const GLintptr offset = buffer_cache.UploadHostMemory( | 321 | const GLintptr offset = buffer_cache.UploadHostMemory( |
| 325 | &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); | 322 | &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); |
| 326 | 323 | ||
| 327 | // Bind the buffer | 324 | // Bind the emulation info buffer |
| 328 | glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(), | 325 | glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset, |
| 329 | offset, static_cast<GLsizeiptr>(sizeof(ubo))); | 326 | static_cast<GLsizeiptr>(sizeof(ubo))); |
| 330 | 327 | ||
| 331 | Shader shader{shader_cache.GetStageProgram(program)}; | 328 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 329 | const auto [program_handle, next_bindings] = | ||
| 330 | shader->GetProgramHandle(primitive_mode, base_bindings); | ||
| 332 | 331 | ||
| 333 | switch (program) { | 332 | switch (program) { |
| 334 | case Maxwell::ShaderProgram::VertexA: | 333 | case Maxwell::ShaderProgram::VertexA: |
| 335 | case Maxwell::ShaderProgram::VertexB: { | 334 | case Maxwell::ShaderProgram::VertexB: |
| 336 | shader_program_manager->UseProgrammableVertexShader( | 335 | shader_program_manager->UseProgrammableVertexShader(program_handle); |
| 337 | shader->GetProgramHandle(primitive_mode)); | ||
| 338 | break; | 336 | break; |
| 339 | } | 337 | case Maxwell::ShaderProgram::Geometry: |
| 340 | case Maxwell::ShaderProgram::Geometry: { | 338 | shader_program_manager->UseProgrammableGeometryShader(program_handle); |
| 341 | shader_program_manager->UseProgrammableGeometryShader( | ||
| 342 | shader->GetProgramHandle(primitive_mode)); | ||
| 343 | break; | 339 | break; |
| 344 | } | 340 | case Maxwell::ShaderProgram::Fragment: |
| 345 | case Maxwell::ShaderProgram::Fragment: { | 341 | shader_program_manager->UseProgrammableFragmentShader(program_handle); |
| 346 | shader_program_manager->UseProgrammableFragmentShader( | ||
| 347 | shader->GetProgramHandle(primitive_mode)); | ||
| 348 | break; | 342 | break; |
| 349 | } | ||
| 350 | default: | 343 | default: |
| 351 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | 344 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |
| 352 | shader_config.enable.Value(), shader_config.offset); | 345 | shader_config.enable.Value(), shader_config.offset); |
| 353 | UNREACHABLE(); | 346 | UNREACHABLE(); |
| 354 | } | 347 | } |
| 355 | 348 | ||
| 356 | // Configure the const buffers for this shader stage. | 349 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 357 | current_constbuffer_bindpoint = | 350 | SetupConstBuffers(stage_enum, shader, program_handle, base_bindings); |
| 358 | SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, | 351 | SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings); |
| 359 | current_constbuffer_bindpoint); | 352 | SetupTextures(stage_enum, shader, program_handle, base_bindings); |
| 360 | |||
| 361 | // Configure the textures for this shader stage. | ||
| 362 | current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, | ||
| 363 | primitive_mode, current_texture_bindpoint); | ||
| 364 | 353 | ||
| 365 | // Workaround for Intel drivers. | 354 | // Workaround for Intel drivers. |
| 366 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 355 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| @@ -375,6 +364,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 375 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 364 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
| 376 | index++; | 365 | index++; |
| 377 | } | 366 | } |
| 367 | |||
| 368 | base_bindings = next_bindings; | ||
| 378 | } | 369 | } |
| 379 | 370 | ||
| 380 | SyncClipEnabled(clip_distances); | 371 | SyncClipEnabled(clip_distances); |
| @@ -924,8 +915,9 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | |||
| 924 | } | 915 | } |
| 925 | } | 916 | } |
| 926 | 917 | ||
| 927 | u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader, | 918 | void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 928 | GLenum primitive_mode, u32 current_bindpoint) { | 919 | const Shader& shader, GLuint program_handle, |
| 920 | BaseBindings base_bindings) { | ||
| 929 | MICROPROFILE_SCOPE(OpenGL_UBO); | 921 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 930 | const auto& gpu = Core::System::GetInstance().GPU(); | 922 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 931 | const auto& maxwell3d = gpu.Maxwell3D(); | 923 | const auto& maxwell3d = gpu.Maxwell3D(); |
| @@ -973,75 +965,73 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 973 | size = Common::AlignUp(size, sizeof(GLvec4)); | 965 | size = Common::AlignUp(size, sizeof(GLvec4)); |
| 974 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | 966 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); |
| 975 | 967 | ||
| 976 | GLintptr const_buffer_offset = buffer_cache.UploadMemory( | 968 | const GLintptr const_buffer_offset = buffer_cache.UploadMemory( |
| 977 | buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); | 969 | buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); |
| 978 | 970 | ||
| 979 | // Now configure the bindpoint of the buffer inside the shader | ||
| 980 | glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), | ||
| 981 | shader->GetProgramResourceIndex(used_buffer), | ||
| 982 | current_bindpoint + bindpoint); | ||
| 983 | |||
| 984 | // Prepare values for multibind | 971 | // Prepare values for multibind |
| 985 | bind_buffers[bindpoint] = buffer_cache.GetHandle(); | 972 | bind_buffers[bindpoint] = buffer_cache.GetHandle(); |
| 986 | bind_offsets[bindpoint] = const_buffer_offset; | 973 | bind_offsets[bindpoint] = const_buffer_offset; |
| 987 | bind_sizes[bindpoint] = size; | 974 | bind_sizes[bindpoint] = size; |
| 988 | } | 975 | } |
| 989 | 976 | ||
| 990 | glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()), | 977 | // The first binding is reserved for emulation values |
| 978 | const GLuint ubo_base_binding = base_bindings.cbuf + 1; | ||
| 979 | glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()), | ||
| 991 | bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); | 980 | bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); |
| 981 | } | ||
| 982 | |||
| 983 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||
| 984 | const Shader& shader, GLenum primitive_mode, | ||
| 985 | BaseBindings base_bindings) { | ||
| 986 | // TODO(Rodrigo): Use ARB_multi_bind here | ||
| 987 | const auto& entries = shader->GetShaderEntries().global_memory_entries; | ||
| 992 | 988 | ||
| 993 | return current_bindpoint + static_cast<u32>(entries.size()); | 989 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) { |
| 990 | const auto& entry = entries[bindpoint]; | ||
| 991 | const u32 current_bindpoint = base_bindings.gmem + bindpoint; | ||
| 992 | const auto& region = global_cache.GetGlobalRegion(entry, stage); | ||
| 993 | |||
| 994 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); | ||
| 995 | } | ||
| 994 | } | 996 | } |
| 995 | 997 | ||
| 996 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, | 998 | void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 997 | GLenum primitive_mode, u32 current_unit) { | 999 | GLuint program_handle, BaseBindings base_bindings) { |
| 998 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1000 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 999 | const auto& gpu = Core::System::GetInstance().GPU(); | 1001 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 1000 | const auto& maxwell3d = gpu.Maxwell3D(); | 1002 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 1001 | const auto& entries = shader->GetShaderEntries().samplers; | 1003 | const auto& entries = shader->GetShaderEntries().samplers; |
| 1002 | 1004 | ||
| 1003 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), | 1005 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), |
| 1004 | "Exceeded the number of active textures."); | 1006 | "Exceeded the number of active textures."); |
| 1005 | 1007 | ||
| 1006 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 1008 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { |
| 1007 | const auto& entry = entries[bindpoint]; | 1009 | const auto& entry = entries[bindpoint]; |
| 1008 | const u32 current_bindpoint = current_unit + bindpoint; | 1010 | const u32 current_bindpoint = base_bindings.sampler + bindpoint; |
| 1009 | 1011 | auto& unit = state.texture_units[current_bindpoint]; | |
| 1010 | // Bind the uniform to the sampler. | ||
| 1011 | |||
| 1012 | glProgramUniform1i(shader->GetProgramHandle(primitive_mode), | ||
| 1013 | shader->GetUniformLocation(entry), current_bindpoint); | ||
| 1014 | 1012 | ||
| 1015 | const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); | 1013 | const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); |
| 1016 | |||
| 1017 | if (!texture.enabled) { | 1014 | if (!texture.enabled) { |
| 1018 | state.texture_units[current_bindpoint].texture = 0; | 1015 | unit.texture = 0; |
| 1019 | continue; | 1016 | continue; |
| 1020 | } | 1017 | } |
| 1021 | 1018 | ||
| 1022 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); | 1019 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); |
| 1020 | |||
| 1023 | Surface surface = res_cache.GetTextureSurface(texture, entry); | 1021 | Surface surface = res_cache.GetTextureSurface(texture, entry); |
| 1024 | if (surface != nullptr) { | 1022 | if (surface != nullptr) { |
| 1025 | const GLuint handle = | 1023 | unit.texture = |
| 1026 | entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; | 1024 | entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; |
| 1027 | const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); | 1025 | unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); |
| 1028 | state.texture_units[current_bindpoint].texture = handle; | 1026 | unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); |
| 1029 | state.texture_units[current_bindpoint].target = target; | 1027 | unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source); |
| 1030 | state.texture_units[current_bindpoint].swizzle.r = | 1028 | unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source); |
| 1031 | MaxwellToGL::SwizzleSource(texture.tic.x_source); | 1029 | unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source); |
| 1032 | state.texture_units[current_bindpoint].swizzle.g = | ||
| 1033 | MaxwellToGL::SwizzleSource(texture.tic.y_source); | ||
| 1034 | state.texture_units[current_bindpoint].swizzle.b = | ||
| 1035 | MaxwellToGL::SwizzleSource(texture.tic.z_source); | ||
| 1036 | state.texture_units[current_bindpoint].swizzle.a = | ||
| 1037 | MaxwellToGL::SwizzleSource(texture.tic.w_source); | ||
| 1038 | } else { | 1030 | } else { |
| 1039 | // Can occur when texture addr is null or its memory is unmapped/invalid | 1031 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| 1040 | state.texture_units[current_bindpoint].texture = 0; | 1032 | unit.texture = 0; |
| 1041 | } | 1033 | } |
| 1042 | } | 1034 | } |
| 1043 | |||
| 1044 | return current_unit + static_cast<u32>(entries.size()); | ||
| 1045 | } | 1035 | } |
| 1046 | 1036 | ||
| 1047 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { | 1037 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 21c51f874..a103692f9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -127,25 +127,18 @@ private: | |||
| 127 | bool using_depth_fb = true, bool preserve_contents = true, | 127 | bool using_depth_fb = true, bool preserve_contents = true, |
| 128 | std::optional<std::size_t> single_color_target = {}); | 128 | std::optional<std::size_t> single_color_target = {}); |
| 129 | 129 | ||
| 130 | /** | 130 | /// Configures the current constbuffers to use for the draw command. |
| 131 | * Configures the current constbuffers to use for the draw command. | 131 | void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, |
| 132 | * @param stage The shader stage to configure buffers for. | 132 | GLuint program_handle, BaseBindings base_bindings); |
| 133 | * @param shader The shader object that contains the specified stage. | 133 | |
| 134 | * @param current_bindpoint The offset at which to start counting new buffer bindpoints. | 134 | /// Configures the current global memory entries to use for the draw command. |
| 135 | * @returns The next available bindpoint for use in the next shader stage. | 135 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 136 | */ | 136 | const Shader& shader, GLenum primitive_mode, |
| 137 | u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | 137 | BaseBindings base_bindings); |
| 138 | GLenum primitive_mode, u32 current_bindpoint); | 138 | |
| 139 | 139 | /// Configures the current textures to use for the draw command. | |
| 140 | /** | 140 | void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, |
| 141 | * Configures the current textures to use for the draw command. | 141 | GLuint program_handle, BaseBindings base_bindings); |
| 142 | * @param stage The shader stage to configure textures for. | ||
| 143 | * @param shader The shader object that contains the specified stage. | ||
| 144 | * @param current_unit The offset at which to start counting unused texture units. | ||
| 145 | * @returns The next available bindpoint for use in the next shader stage. | ||
| 146 | */ | ||
| 147 | u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | ||
| 148 | GLenum primitive_mode, u32 current_unit); | ||
| 149 | 142 | ||
| 150 | /// Syncs the viewport and depth range to match the guest state | 143 | /// Syncs the viewport and depth range to match the guest state |
| 151 | void SyncViewport(OpenGLState& current_state); | 144 | void SyncViewport(OpenGLState& current_state); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b3aca39af..90eda7814 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -34,36 +34,25 @@ static ProgramCode GetShaderCode(VAddr addr) { | |||
| 34 | return program_code; | 34 | return program_code; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | /// Helper function to set shader uniform block bindings for a single shader stage | 37 | /// Gets the shader type from a Maxwell program type |
| 38 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | 38 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { |
| 39 | Maxwell::ShaderStage binding, std::size_t expected_size) { | 39 | switch (program_type) { |
| 40 | const GLuint ub_index = glGetUniformBlockIndex(shader, name); | 40 | case Maxwell::ShaderProgram::VertexA: |
| 41 | if (ub_index == GL_INVALID_INDEX) { | 41 | case Maxwell::ShaderProgram::VertexB: |
| 42 | return; | 42 | return GL_VERTEX_SHADER; |
| 43 | case Maxwell::ShaderProgram::Geometry: | ||
| 44 | return GL_GEOMETRY_SHADER; | ||
| 45 | case Maxwell::ShaderProgram::Fragment: | ||
| 46 | return GL_FRAGMENT_SHADER; | ||
| 47 | default: | ||
| 48 | return GL_NONE; | ||
| 43 | } | 49 | } |
| 44 | |||
| 45 | GLint ub_size = 0; | ||
| 46 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||
| 47 | ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size, | ||
| 48 | "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); | ||
| 49 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Sets shader uniform block bindings for an entire shader program | ||
| 53 | static void SetShaderUniformBlockBindings(GLuint shader) { | ||
| 54 | SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex, | ||
| 55 | sizeof(GLShader::MaxwellUniformData)); | ||
| 56 | SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry, | ||
| 57 | sizeof(GLShader::MaxwellUniformData)); | ||
| 58 | SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment, | ||
| 59 | sizeof(GLShader::MaxwellUniformData)); | ||
| 60 | } | 50 | } |
| 61 | 51 | ||
| 62 | CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | 52 | CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) |
| 63 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { | 53 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { |
| 64 | 54 | ||
| 65 | GLShader::ProgramResult program_result; | 55 | GLShader::ProgramResult program_result; |
| 66 | GLenum gl_type{}; | ||
| 67 | 56 | ||
| 68 | switch (program_type) { | 57 | switch (program_type) { |
| 69 | case Maxwell::ShaderProgram::VertexA: | 58 | case Maxwell::ShaderProgram::VertexA: |
| @@ -74,17 +63,14 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | |||
| 74 | case Maxwell::ShaderProgram::VertexB: | 63 | case Maxwell::ShaderProgram::VertexB: |
| 75 | CalculateProperties(); | 64 | CalculateProperties(); |
| 76 | program_result = GLShader::GenerateVertexShader(setup); | 65 | program_result = GLShader::GenerateVertexShader(setup); |
| 77 | gl_type = GL_VERTEX_SHADER; | ||
| 78 | break; | 66 | break; |
| 79 | case Maxwell::ShaderProgram::Geometry: | 67 | case Maxwell::ShaderProgram::Geometry: |
| 80 | CalculateProperties(); | 68 | CalculateProperties(); |
| 81 | program_result = GLShader::GenerateGeometryShader(setup); | 69 | program_result = GLShader::GenerateGeometryShader(setup); |
| 82 | gl_type = GL_GEOMETRY_SHADER; | ||
| 83 | break; | 70 | break; |
| 84 | case Maxwell::ShaderProgram::Fragment: | 71 | case Maxwell::ShaderProgram::Fragment: |
| 85 | CalculateProperties(); | 72 | CalculateProperties(); |
| 86 | program_result = GLShader::GenerateFragmentShader(setup); | 73 | program_result = GLShader::GenerateFragmentShader(setup); |
| 87 | gl_type = GL_FRAGMENT_SHADER; | ||
| 88 | break; | 74 | break; |
| 89 | default: | 75 | default: |
| 90 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 76 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); |
| @@ -92,59 +78,105 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | |||
| 92 | return; | 78 | return; |
| 93 | } | 79 | } |
| 94 | 80 | ||
| 81 | code = program_result.first; | ||
| 95 | entries = program_result.second; | 82 | entries = program_result.second; |
| 96 | shader_length = entries.shader_length; | 83 | shader_length = entries.shader_length; |
| 84 | } | ||
| 97 | 85 | ||
| 98 | if (program_type != Maxwell::ShaderProgram::Geometry) { | 86 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, |
| 99 | OGLShader shader; | 87 | BaseBindings base_bindings) { |
| 100 | shader.Create(program_result.first.c_str(), gl_type); | 88 | GLuint handle{}; |
| 101 | program.Create(true, shader.handle); | 89 | if (program_type == Maxwell::ShaderProgram::Geometry) { |
| 102 | SetShaderUniformBlockBindings(program.handle); | 90 | handle = GetGeometryShader(primitive_mode, base_bindings); |
| 103 | LabelGLObject(GL_PROGRAM, program.handle, addr); | ||
| 104 | } else { | 91 | } else { |
| 105 | // Store shader's code to lazily build it on draw | 92 | const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); |
| 106 | geometry_programs.code = program_result.first; | 93 | auto& program = entry->second; |
| 94 | if (is_cache_miss) { | ||
| 95 | std::string source = AllocateBindings(base_bindings); | ||
| 96 | source += code; | ||
| 97 | |||
| 98 | OGLShader shader; | ||
| 99 | shader.Create(source.c_str(), GetShaderType(program_type)); | ||
| 100 | program.Create(true, shader.handle); | ||
| 101 | LabelGLObject(GL_PROGRAM, program.handle, addr); | ||
| 102 | } | ||
| 103 | |||
| 104 | handle = program.handle; | ||
| 107 | } | 105 | } |
| 106 | |||
| 107 | // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for | ||
| 108 | // emulation values | ||
| 109 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1; | ||
| 110 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | ||
| 111 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | ||
| 112 | |||
| 113 | return {handle, base_bindings}; | ||
| 108 | } | 114 | } |
| 109 | 115 | ||
| 110 | GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { | 116 | std::string CachedShader::AllocateBindings(BaseBindings base_bindings) { |
| 111 | const auto search{resource_cache.find(buffer.GetHash())}; | 117 | std::string code = "#version 430 core\n"; |
| 112 | if (search == resource_cache.end()) { | 118 | code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); |
| 113 | const GLuint index{ | 119 | |
| 114 | glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; | 120 | for (const auto& cbuf : entries.const_buffers) { |
| 115 | resource_cache[buffer.GetHash()] = index; | 121 | code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); |
| 116 | return index; | ||
| 117 | } | 122 | } |
| 118 | 123 | ||
| 119 | return search->second; | 124 | for (const auto& gmem : entries.global_memory_entries) { |
| 120 | } | 125 | code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), |
| 126 | gmem.GetCbufOffset(), base_bindings.gmem++); | ||
| 127 | } | ||
| 121 | 128 | ||
| 122 | GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) { | 129 | for (const auto& sampler : entries.samplers) { |
| 123 | const auto search{uniform_cache.find(sampler.GetHash())}; | 130 | code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), |
| 124 | if (search == uniform_cache.end()) { | 131 | base_bindings.sampler++); |
| 125 | const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())}; | ||
| 126 | uniform_cache[sampler.GetHash()] = index; | ||
| 127 | return index; | ||
| 128 | } | 132 | } |
| 129 | 133 | ||
| 130 | return search->second; | 134 | return code; |
| 135 | } | ||
| 136 | |||
| 137 | GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { | ||
| 138 | const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); | ||
| 139 | auto& programs = entry->second; | ||
| 140 | |||
| 141 | switch (primitive_mode) { | ||
| 142 | case GL_POINTS: | ||
| 143 | return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | ||
| 144 | case GL_LINES: | ||
| 145 | case GL_LINE_STRIP: | ||
| 146 | return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines"); | ||
| 147 | case GL_LINES_ADJACENCY: | ||
| 148 | case GL_LINE_STRIP_ADJACENCY: | ||
| 149 | return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4, | ||
| 150 | "ShaderLinesAdjacency"); | ||
| 151 | case GL_TRIANGLES: | ||
| 152 | case GL_TRIANGLE_STRIP: | ||
| 153 | case GL_TRIANGLE_FAN: | ||
| 154 | return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3, | ||
| 155 | "ShaderTriangles"); | ||
| 156 | case GL_TRIANGLES_ADJACENCY: | ||
| 157 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 158 | return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, | ||
| 159 | "triangles_adjacency", 6, "ShaderTrianglesAdjacency"); | ||
| 160 | default: | ||
| 161 | UNREACHABLE_MSG("Unknown primitive mode."); | ||
| 162 | return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | ||
| 163 | } | ||
| 131 | } | 164 | } |
| 132 | 165 | ||
| 133 | GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, | 166 | GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, |
| 134 | const std::string& glsl_topology, u32 max_vertices, | 167 | const std::string& glsl_topology, u32 max_vertices, |
| 135 | const std::string& debug_name) { | 168 | const std::string& debug_name) { |
| 136 | if (target_program.handle != 0) { | 169 | if (target_program.handle != 0) { |
| 137 | return target_program.handle; | 170 | return target_program.handle; |
| 138 | } | 171 | } |
| 139 | std::string source = "#version 430 core\n"; | 172 | std::string source = AllocateBindings(base_bindings); |
| 140 | source += "layout (" + glsl_topology + ") in;\n"; | 173 | source += "layout (" + glsl_topology + ") in;\n"; |
| 141 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 174 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 142 | source += geometry_programs.code; | 175 | source += code; |
| 143 | 176 | ||
| 144 | OGLShader shader; | 177 | OGLShader shader; |
| 145 | shader.Create(source.c_str(), GL_GEOMETRY_SHADER); | 178 | shader.Create(source.c_str(), GL_GEOMETRY_SHADER); |
| 146 | target_program.Create(true, shader.handle); | 179 | target_program.Create(true, shader.handle); |
| 147 | SetShaderUniformBlockBindings(target_program.handle); | ||
| 148 | LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); | 180 | LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); |
| 149 | return target_program.handle; | 181 | return target_program.handle; |
| 150 | }; | 182 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0887dd7b..904d15dd0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -7,6 +7,9 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <map> | 8 | #include <map> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <tuple> | ||
| 11 | |||
| 12 | #include <glad/glad.h> | ||
| 10 | 13 | ||
| 11 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| @@ -23,6 +26,16 @@ class RasterizerOpenGL; | |||
| 23 | using Shader = std::shared_ptr<CachedShader>; | 26 | using Shader = std::shared_ptr<CachedShader>; |
| 24 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 27 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 25 | 28 | ||
| 29 | struct BaseBindings { | ||
| 30 | u32 cbuf{}; | ||
| 31 | u32 gmem{}; | ||
| 32 | u32 sampler{}; | ||
| 33 | |||
| 34 | bool operator<(const BaseBindings& rhs) const { | ||
| 35 | return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 26 | class CachedShader final : public RasterizerCacheObject { | 39 | class CachedShader final : public RasterizerCacheObject { |
| 27 | public: | 40 | public: |
| 28 | CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); | 41 | CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); |
| @@ -44,70 +57,45 @@ public: | |||
| 44 | } | 57 | } |
| 45 | 58 | ||
| 46 | /// Gets the GL program handle for the shader | 59 | /// Gets the GL program handle for the shader |
| 47 | GLuint GetProgramHandle(GLenum primitive_mode) { | 60 | std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode, |
| 48 | if (program_type != Maxwell::ShaderProgram::Geometry) { | 61 | BaseBindings base_bindings); |
| 49 | return program.handle; | ||
| 50 | } | ||
| 51 | switch (primitive_mode) { | ||
| 52 | case GL_POINTS: | ||
| 53 | return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); | ||
| 54 | case GL_LINES: | ||
| 55 | case GL_LINE_STRIP: | ||
| 56 | return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines"); | ||
| 57 | case GL_LINES_ADJACENCY: | ||
| 58 | case GL_LINE_STRIP_ADJACENCY: | ||
| 59 | return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4, | ||
| 60 | "ShaderLinesAdjacency"); | ||
| 61 | case GL_TRIANGLES: | ||
| 62 | case GL_TRIANGLE_STRIP: | ||
| 63 | case GL_TRIANGLE_FAN: | ||
| 64 | return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3, | ||
| 65 | "ShaderTriangles"); | ||
| 66 | case GL_TRIANGLES_ADJACENCY: | ||
| 67 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 68 | return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency", | ||
| 69 | 6, "ShaderTrianglesAdjacency"); | ||
| 70 | default: | ||
| 71 | UNREACHABLE_MSG("Unknown primitive mode."); | ||
| 72 | return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | 62 | ||
| 76 | /// Gets the GL program resource location for the specified resource, caching as needed | 63 | private: |
| 77 | GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); | 64 | // Geometry programs. These are needed because GLSL needs an input topology but it's not |
| 65 | // declared by the hardware. Workaround this issue by generating a different shader per input | ||
| 66 | // topology class. | ||
| 67 | struct GeometryPrograms { | ||
| 68 | OGLProgram points; | ||
| 69 | OGLProgram lines; | ||
| 70 | OGLProgram lines_adjacency; | ||
| 71 | OGLProgram triangles; | ||
| 72 | OGLProgram triangles_adjacency; | ||
| 73 | }; | ||
| 78 | 74 | ||
| 79 | /// Gets the GL uniform location for the specified resource, caching as needed | 75 | std::string AllocateBindings(BaseBindings base_bindings); |
| 80 | GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); | 76 | |
| 77 | GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); | ||
| 81 | 78 | ||
| 82 | private: | ||
| 83 | /// Generates a geometry shader or returns one that already exists. | 79 | /// Generates a geometry shader or returns one that already exists. |
| 84 | GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology, | 80 | GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, |
| 85 | u32 max_vertices, const std::string& debug_name); | 81 | const std::string& glsl_topology, u32 max_vertices, |
| 82 | const std::string& debug_name); | ||
| 86 | 83 | ||
| 87 | void CalculateProperties(); | 84 | void CalculateProperties(); |
| 88 | 85 | ||
| 89 | VAddr addr; | 86 | VAddr addr{}; |
| 90 | std::size_t shader_length; | 87 | std::size_t shader_length{}; |
| 91 | Maxwell::ShaderProgram program_type; | 88 | Maxwell::ShaderProgram program_type{}; |
| 92 | GLShader::ShaderSetup setup; | 89 | GLShader::ShaderSetup setup; |
| 93 | GLShader::ShaderEntries entries; | 90 | GLShader::ShaderEntries entries; |
| 94 | 91 | ||
| 95 | // Non-geometry program. | 92 | std::string code; |
| 96 | OGLProgram program; | ||
| 97 | 93 | ||
| 98 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 94 | std::map<BaseBindings, OGLProgram> programs; |
| 99 | // declared by the hardware. Workaround this issue by generating a different shader per input | 95 | std::map<BaseBindings, GeometryPrograms> geometry_programs; |
| 100 | // topology class. | ||
| 101 | struct { | ||
| 102 | std::string code; | ||
| 103 | OGLProgram points; | ||
| 104 | OGLProgram lines; | ||
| 105 | OGLProgram lines_adjacency; | ||
| 106 | OGLProgram triangles; | ||
| 107 | OGLProgram triangles_adjacency; | ||
| 108 | } geometry_programs; | ||
| 109 | 96 | ||
| 110 | std::map<u32, GLuint> resource_cache; | 97 | std::map<u32, GLuint> cbuf_resource_cache; |
| 98 | std::map<u32, GLuint> gmem_resource_cache; | ||
| 111 | std::map<u32, GLint> uniform_cache; | 99 | std::map<u32, GLint> uniform_cache; |
| 112 | }; | 100 | }; |
| 113 | 101 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3411cf9e6..004245431 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -34,6 +34,8 @@ using Operation = const OperationNode&; | |||
| 34 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | 34 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; |
| 35 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 35 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 36 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 36 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); |
| 37 | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | ||
| 38 | static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); | ||
| 37 | 39 | ||
| 38 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 40 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 39 | 41 | ||
| @@ -143,6 +145,7 @@ public: | |||
| 143 | DeclareInputAttributes(); | 145 | DeclareInputAttributes(); |
| 144 | DeclareOutputAttributes(); | 146 | DeclareOutputAttributes(); |
| 145 | DeclareConstantBuffers(); | 147 | DeclareConstantBuffers(); |
| 148 | DeclareGlobalMemory(); | ||
| 146 | DeclareSamplers(); | 149 | DeclareSamplers(); |
| 147 | 150 | ||
| 148 | code.AddLine("void execute_" + suffix + "() {"); | 151 | code.AddLine("void execute_" + suffix + "() {"); |
| @@ -190,12 +193,15 @@ public: | |||
| 190 | ShaderEntries GetShaderEntries() const { | 193 | ShaderEntries GetShaderEntries() const { |
| 191 | ShaderEntries entries; | 194 | ShaderEntries entries; |
| 192 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 195 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 193 | ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first); | 196 | entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first), |
| 194 | entries.const_buffers.push_back(desc); | 197 | cbuf.first); |
| 195 | } | 198 | } |
| 196 | for (const auto& sampler : ir.GetSamplers()) { | 199 | for (const auto& sampler : ir.GetSamplers()) { |
| 197 | SamplerEntry desc(sampler, stage, GetSampler(sampler)); | 200 | entries.samplers.emplace_back(sampler, stage, GetSampler(sampler)); |
| 198 | entries.samplers.push_back(desc); | 201 | } |
| 202 | for (const auto& gmem : ir.GetGlobalMemoryBases()) { | ||
| 203 | entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage, | ||
| 204 | GetGlobalMemoryBlock(gmem)); | ||
| 199 | } | 205 | } |
| 200 | entries.clip_distances = ir.GetClipDistances(); | 206 | entries.clip_distances = ir.GetClipDistances(); |
| 201 | entries.shader_length = ir.GetLength(); | 207 | entries.shader_length = ir.GetLength(); |
| @@ -368,13 +374,26 @@ private: | |||
| 368 | void DeclareConstantBuffers() { | 374 | void DeclareConstantBuffers() { |
| 369 | for (const auto& entry : ir.GetConstantBuffers()) { | 375 | for (const auto& entry : ir.GetConstantBuffers()) { |
| 370 | const auto [index, size] = entry; | 376 | const auto [index, size] = entry; |
| 371 | code.AddLine("layout (std140) uniform " + GetConstBufferBlock(index) + " {"); | 377 | code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) + |
| 378 | ") uniform " + GetConstBufferBlock(index) + " {"); | ||
| 372 | code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); | 379 | code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); |
| 373 | code.AddLine("};"); | 380 | code.AddLine("};"); |
| 374 | code.AddNewLine(); | 381 | code.AddNewLine(); |
| 375 | } | 382 | } |
| 376 | } | 383 | } |
| 377 | 384 | ||
| 385 | void DeclareGlobalMemory() { | ||
| 386 | for (const auto& entry : ir.GetGlobalMemoryBases()) { | ||
| 387 | const std::string binding = | ||
| 388 | fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); | ||
| 389 | code.AddLine("layout (std430, binding = " + binding + ") buffer " + | ||
| 390 | GetGlobalMemoryBlock(entry) + " {"); | ||
| 391 | code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); | ||
| 392 | code.AddLine("};"); | ||
| 393 | code.AddNewLine(); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 378 | void DeclareSamplers() { | 397 | void DeclareSamplers() { |
| 379 | const auto& samplers = ir.GetSamplers(); | 398 | const auto& samplers = ir.GetSamplers(); |
| 380 | for (const auto& sampler : samplers) { | 399 | for (const auto& sampler : samplers) { |
| @@ -398,7 +417,8 @@ private: | |||
| 398 | if (sampler.IsShadow()) | 417 | if (sampler.IsShadow()) |
| 399 | sampler_type += "Shadow"; | 418 | sampler_type += "Shadow"; |
| 400 | 419 | ||
| 401 | code.AddLine("uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); | 420 | code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) + |
| 421 | ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); | ||
| 402 | } | 422 | } |
| 403 | if (!samplers.empty()) | 423 | if (!samplers.empty()) |
| 404 | code.AddNewLine(); | 424 | code.AddNewLine(); |
| @@ -538,6 +558,12 @@ private: | |||
| 538 | UNREACHABLE_MSG("Unmanaged offset node type"); | 558 | UNREACHABLE_MSG("Unmanaged offset node type"); |
| 539 | } | 559 | } |
| 540 | 560 | ||
| 561 | } else if (const auto gmem = std::get_if<GmemNode>(node)) { | ||
| 562 | const std::string real = Visit(gmem->GetRealAddress()); | ||
| 563 | const std::string base = Visit(gmem->GetBaseAddress()); | ||
| 564 | const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; | ||
| 565 | return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); | ||
| 566 | |||
| 541 | } else if (const auto lmem = std::get_if<LmemNode>(node)) { | 567 | } else if (const auto lmem = std::get_if<LmemNode>(node)) { |
| 542 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 568 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 543 | 569 | ||
| @@ -1471,6 +1497,15 @@ private: | |||
| 1471 | return GetDeclarationWithSuffix(index, "cbuf"); | 1497 | return GetDeclarationWithSuffix(index, "cbuf"); |
| 1472 | } | 1498 | } |
| 1473 | 1499 | ||
| 1500 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | ||
| 1501 | return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { | ||
| 1505 | return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, | ||
| 1506 | suffix); | ||
| 1507 | } | ||
| 1508 | |||
| 1474 | std::string GetConstBufferBlock(u32 index) const { | 1509 | std::string GetConstBufferBlock(u32 index) const { |
| 1475 | return GetDeclarationWithSuffix(index, "cbuf_block"); | 1510 | return GetDeclarationWithSuffix(index, "cbuf_block"); |
| 1476 | } | 1511 | } |
| @@ -1505,8 +1540,10 @@ private: | |||
| 1505 | }; | 1540 | }; |
| 1506 | 1541 | ||
| 1507 | std::string GetCommonDeclarations() { | 1542 | std::string GetCommonDeclarations() { |
| 1508 | return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) + | 1543 | const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); |
| 1509 | "\n" | 1544 | const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); |
| 1545 | return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + | ||
| 1546 | "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + | ||
| 1510 | "#define ftoi floatBitsToInt\n" | 1547 | "#define ftoi floatBitsToInt\n" |
| 1511 | "#define ftou floatBitsToUint\n" | 1548 | "#define ftou floatBitsToUint\n" |
| 1512 | "#define itof intBitsToFloat\n" | 1549 | "#define itof intBitsToFloat\n" |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 396a560d8..0856a1361 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -38,10 +38,6 @@ public: | |||
| 38 | return index; | 38 | return index; |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | u32 GetHash() const { | ||
| 42 | return (static_cast<u32>(stage) << 16) | index; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | 41 | private: |
| 46 | std::string name; | 42 | std::string name; |
| 47 | Maxwell::ShaderStage stage{}; | 43 | Maxwell::ShaderStage stage{}; |
| @@ -62,18 +58,44 @@ public: | |||
| 62 | return stage; | 58 | return stage; |
| 63 | } | 59 | } |
| 64 | 60 | ||
| 65 | u32 GetHash() const { | 61 | private: |
| 66 | return (static_cast<u32>(stage) << 16) | static_cast<u32>(GetIndex()); | 62 | std::string name; |
| 63 | Maxwell::ShaderStage stage{}; | ||
| 64 | }; | ||
| 65 | |||
| 66 | class GlobalMemoryEntry { | ||
| 67 | public: | ||
| 68 | explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage, | ||
| 69 | std::string name) | ||
| 70 | : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {} | ||
| 71 | |||
| 72 | u32 GetCbufIndex() const { | ||
| 73 | return cbuf_index; | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 GetCbufOffset() const { | ||
| 77 | return cbuf_offset; | ||
| 78 | } | ||
| 79 | |||
| 80 | const std::string& GetName() const { | ||
| 81 | return name; | ||
| 82 | } | ||
| 83 | |||
| 84 | Maxwell::ShaderStage GetStage() const { | ||
| 85 | return stage; | ||
| 67 | } | 86 | } |
| 68 | 87 | ||
| 69 | private: | 88 | private: |
| 70 | std::string name; | 89 | u32 cbuf_index{}; |
| 90 | u32 cbuf_offset{}; | ||
| 71 | Maxwell::ShaderStage stage{}; | 91 | Maxwell::ShaderStage stage{}; |
| 92 | std::string name; | ||
| 72 | }; | 93 | }; |
| 73 | 94 | ||
| 74 | struct ShaderEntries { | 95 | struct ShaderEntries { |
| 75 | std::vector<ConstBufferEntry> const_buffers; | 96 | std::vector<ConstBufferEntry> const_buffers; |
| 76 | std::vector<SamplerEntry> samplers; | 97 | std::vector<SamplerEntry> samplers; |
| 98 | std::vector<GlobalMemoryEntry> global_memory_entries; | ||
| 77 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 99 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 78 | std::size_t shader_length{}; | 100 | std::size_t shader_length{}; |
| 79 | }; | 101 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 446d1a93f..04e1db911 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -20,15 +20,14 @@ static constexpr u32 PROGRAM_OFFSET{10}; | |||
| 20 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { |
| 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 22 | 22 | ||
| 23 | std::string out = "#version 430 core\n"; | 23 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 24 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 25 | out += "// Shader Unique Id: VS" + id + "\n\n"; | 24 | out += "// Shader Unique Id: VS" + id + "\n\n"; |
| 26 | out += GetCommonDeclarations(); | 25 | out += GetCommonDeclarations(); |
| 27 | 26 | ||
| 28 | out += R"( | 27 | out += R"( |
| 29 | layout (location = 0) out vec4 position; | 28 | layout (location = 0) out vec4 position; |
| 30 | 29 | ||
| 31 | layout(std140) uniform vs_config { | 30 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { |
| 32 | vec4 viewport_flip; | 31 | vec4 viewport_flip; |
| 33 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 32 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 34 | uvec4 alpha_test; | 33 | uvec4 alpha_test; |
| @@ -78,7 +77,6 @@ void main() { | |||
| 78 | } | 77 | } |
| 79 | 78 | ||
| 80 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | 79 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { |
| 81 | // Version is intentionally skipped in shader generation, it's added by the lazy compilation. | ||
| 82 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 80 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 83 | 81 | ||
| 84 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 82 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| @@ -89,7 +87,7 @@ ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | |||
| 89 | layout (location = 0) in vec4 gs_position[]; | 87 | layout (location = 0) in vec4 gs_position[]; |
| 90 | layout (location = 0) out vec4 position; | 88 | layout (location = 0) out vec4 position; |
| 91 | 89 | ||
| 92 | layout (std140) uniform gs_config { | 90 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { |
| 93 | vec4 viewport_flip; | 91 | vec4 viewport_flip; |
| 94 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 92 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 95 | uvec4 alpha_test; | 93 | uvec4 alpha_test; |
| @@ -112,8 +110,7 @@ void main() { | |||
| 112 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | 110 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { |
| 113 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 111 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 114 | 112 | ||
| 115 | std::string out = "#version 430 core\n"; | 113 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 116 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 117 | out += "// Shader Unique Id: FS" + id + "\n\n"; | 114 | out += "// Shader Unique Id: FS" + id + "\n\n"; |
| 118 | out += GetCommonDeclarations(); | 115 | out += GetCommonDeclarations(); |
| 119 | 116 | ||
| @@ -129,7 +126,7 @@ layout (location = 7) out vec4 FragColor7; | |||
| 129 | 126 | ||
| 130 | layout (location = 0) in vec4 position; | 127 | layout (location = 0) in vec4 position; |
| 131 | 128 | ||
| 132 | layout (std140) uniform fs_config { | 129 | layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { |
| 133 | vec4 viewport_flip; | 130 | vec4 viewport_flip; |
| 134 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 131 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 135 | uvec4 alpha_test; | 132 | uvec4 alpha_test; |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ae71672d6..04cb386b7 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include <fmt/format.h> | ||
| 7 | 8 | ||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| @@ -119,6 +120,54 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { | |||
| 119 | } | 120 | } |
| 120 | break; | 121 | break; |
| 121 | } | 122 | } |
| 123 | case OpCode::Id::LDG: { | ||
| 124 | const u32 count = [&]() { | ||
| 125 | switch (instr.ldg.type) { | ||
| 126 | case Tegra::Shader::UniformType::Single: | ||
| 127 | return 1; | ||
| 128 | case Tegra::Shader::UniformType::Double: | ||
| 129 | return 2; | ||
| 130 | case Tegra::Shader::UniformType::Quad: | ||
| 131 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 132 | return 4; | ||
| 133 | default: | ||
| 134 | UNIMPLEMENTED_MSG("Unimplemented LDG size!"); | ||
| 135 | return 1; | ||
| 136 | } | ||
| 137 | }(); | ||
| 138 | |||
| 139 | const Node addr_register = GetRegister(instr.gpr8); | ||
| 140 | const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size())); | ||
| 141 | const auto cbuf = std::get_if<CbufNode>(base_address); | ||
| 142 | ASSERT(cbuf != nullptr); | ||
| 143 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); | ||
| 144 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 145 | const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4; | ||
| 146 | |||
| 147 | bb.push_back(Comment( | ||
| 148 | fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 149 | |||
| 150 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | ||
| 151 | used_global_memory_bases.insert(descriptor); | ||
| 152 | |||
| 153 | const Node immediate_offset = | ||
| 154 | Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); | ||
| 155 | const Node base_real_address = | ||
| 156 | Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); | ||
| 157 | |||
| 158 | for (u32 i = 0; i < count; ++i) { | ||
| 159 | const Node it_offset = Immediate(i * 4); | ||
| 160 | const Node real_address = | ||
| 161 | Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); | ||
| 162 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); | ||
| 163 | |||
| 164 | SetTemporal(bb, i, gmem); | ||
| 165 | } | ||
| 166 | for (u32 i = 0; i < count; ++i) { | ||
| 167 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 168 | } | ||
| 169 | break; | ||
| 170 | } | ||
| 122 | case OpCode::Id::ST_A: { | 171 | case OpCode::Id::ST_A: { |
| 123 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | 172 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
| 124 | "Indirect attribute loads are not supported"); | 173 | "Indirect attribute loads are not supported"); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ef8f94480..c4ecb2e3c 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -257,6 +257,15 @@ private: | |||
| 257 | bool is_indirect{}; | 257 | bool is_indirect{}; |
| 258 | }; | 258 | }; |
| 259 | 259 | ||
| 260 | struct GlobalMemoryBase { | ||
| 261 | u32 cbuf_index{}; | ||
| 262 | u32 cbuf_offset{}; | ||
| 263 | |||
| 264 | bool operator<(const GlobalMemoryBase& rhs) const { | ||
| 265 | return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); | ||
| 266 | } | ||
| 267 | }; | ||
| 268 | |||
| 260 | struct MetaArithmetic { | 269 | struct MetaArithmetic { |
| 261 | bool precise{}; | 270 | bool precise{}; |
| 262 | }; | 271 | }; |
| @@ -478,14 +487,26 @@ private: | |||
| 478 | /// Global memory node | 487 | /// Global memory node |
| 479 | class GmemNode final { | 488 | class GmemNode final { |
| 480 | public: | 489 | public: |
| 481 | explicit constexpr GmemNode(Node address) : address{address} {} | 490 | explicit constexpr GmemNode(Node real_address, Node base_address, |
| 491 | const GlobalMemoryBase& descriptor) | ||
| 492 | : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {} | ||
| 482 | 493 | ||
| 483 | Node GetAddress() const { | 494 | Node GetRealAddress() const { |
| 484 | return address; | 495 | return real_address; |
| 496 | } | ||
| 497 | |||
| 498 | Node GetBaseAddress() const { | ||
| 499 | return base_address; | ||
| 500 | } | ||
| 501 | |||
| 502 | const GlobalMemoryBase& GetDescriptor() const { | ||
| 503 | return descriptor; | ||
| 485 | } | 504 | } |
| 486 | 505 | ||
| 487 | private: | 506 | private: |
| 488 | const Node address; | 507 | const Node real_address; |
| 508 | const Node base_address; | ||
| 509 | const GlobalMemoryBase descriptor; | ||
| 489 | }; | 510 | }; |
| 490 | 511 | ||
| 491 | /// Commentary, can be dropped | 512 | /// Commentary, can be dropped |
| @@ -543,6 +564,10 @@ public: | |||
| 543 | return used_clip_distances; | 564 | return used_clip_distances; |
| 544 | } | 565 | } |
| 545 | 566 | ||
| 567 | const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { | ||
| 568 | return used_global_memory_bases; | ||
| 569 | } | ||
| 570 | |||
| 546 | std::size_t GetLength() const { | 571 | std::size_t GetLength() const { |
| 547 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | 572 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
| 548 | } | 573 | } |
| @@ -734,6 +759,10 @@ private: | |||
| 734 | void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | 759 | void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
| 735 | Node op_c, Node imm_lut, bool sets_cc); | 760 | Node op_c, Node imm_lut, bool sets_cc); |
| 736 | 761 | ||
| 762 | Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor); | ||
| 763 | |||
| 764 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor); | ||
| 765 | |||
| 737 | template <typename... T> | 766 | template <typename... T> |
| 738 | Node Operation(OperationCode code, const T*... operands) { | 767 | Node Operation(OperationCode code, const T*... operands) { |
| 739 | return StoreNode(OperationNode(code, operands...)); | 768 | return StoreNode(OperationNode(code, operands...)); |
| @@ -786,6 +815,7 @@ private: | |||
| 786 | std::map<u32, ConstBuffer> used_cbufs; | 815 | std::map<u32, ConstBuffer> used_cbufs; |
| 787 | std::set<Sampler> used_samplers; | 816 | std::set<Sampler> used_samplers; |
| 788 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 817 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 818 | std::set<GlobalMemoryBase> used_global_memory_bases; | ||
| 789 | 819 | ||
| 790 | Tegra::Shader::Header header; | 820 | Tegra::Shader::Header header; |
| 791 | }; | 821 | }; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp new file mode 100644 index 000000000..d6d29ee9f --- /dev/null +++ b/src/video_core/shader/track.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <variant> | ||
| 8 | |||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor, | ||
| 15 | OperationCode operation_code) { | ||
| 16 | for (; cursor >= 0; --cursor) { | ||
| 17 | const Node node = code[cursor]; | ||
| 18 | if (const auto operation = std::get_if<OperationNode>(node)) { | ||
| 19 | if (operation->GetCode() == operation_code) | ||
| 20 | return {node, cursor}; | ||
| 21 | } | ||
| 22 | } | ||
| 23 | return {}; | ||
| 24 | } | ||
| 25 | } // namespace | ||
| 26 | |||
| 27 | Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { | ||
| 28 | if (const auto cbuf = std::get_if<CbufNode>(tracked)) { | ||
| 29 | // Cbuf found, but it has to be immediate | ||
| 30 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | ||
| 31 | } | ||
| 32 | if (const auto gpr = std::get_if<GprNode>(tracked)) { | ||
| 33 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 34 | return nullptr; | ||
| 35 | } | ||
| 36 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 37 | // register that it uses as operand | ||
| 38 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 39 | if (!source) { | ||
| 40 | return nullptr; | ||
| 41 | } | ||
| 42 | return TrackCbuf(source, code, new_cursor); | ||
| 43 | } | ||
| 44 | if (const auto operation = std::get_if<OperationNode>(tracked)) { | ||
| 45 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | ||
| 46 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | ||
| 47 | // Cbuf found in operand | ||
| 48 | return found; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | return nullptr; | ||
| 52 | } | ||
| 53 | return nullptr; | ||
| 54 | } | ||
| 55 | |||
| 56 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code, | ||
| 57 | s64 cursor) { | ||
| 58 | for (; cursor >= 0; --cursor) { | ||
| 59 | const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); | ||
| 60 | if (!found_node) { | ||
| 61 | return {}; | ||
| 62 | } | ||
| 63 | const auto operation = std::get_if<OperationNode>(found_node); | ||
| 64 | ASSERT(operation); | ||
| 65 | |||
| 66 | const auto& target = (*operation)[0]; | ||
| 67 | if (const auto gpr_target = std::get_if<GprNode>(target)) { | ||
| 68 | if (gpr_target->GetIndex() == tracked->GetIndex()) { | ||
| 69 | return {(*operation)[1], new_cursor}; | ||
| 70 | } | ||
| 71 | } | ||
| 72 | } | ||
| 73 | return {}; | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader | ||