diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/gpu.cpp | 183 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 71 |
2 files changed, 242 insertions, 12 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08cf6268f..d3d32a359 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/memory.h" | ||
| 6 | #include "video_core/engines/fermi_2d.h" | 8 | #include "video_core/engines/fermi_2d.h" |
| 7 | #include "video_core/engines/kepler_memory.h" | 9 | #include "video_core/engines/kepler_memory.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| @@ -124,9 +126,36 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) { | |||
| 124 | } | 126 | } |
| 125 | } | 127 | } |
| 126 | 128 | ||
| 129 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 130 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 131 | // So the values you see in docs might be multiplied by 4. | ||
| 127 | enum class BufferMethods { | 132 | enum class BufferMethods { |
| 128 | BindObject = 0, | 133 | BindObject = 0x0, |
| 129 | CountBufferMethods = 0x40, | 134 | Nop = 0x2, |
| 135 | SemaphoreAddressHigh = 0x4, | ||
| 136 | SemaphoreAddressLow = 0x5, | ||
| 137 | SemaphoreSequence = 0x6, | ||
| 138 | SemaphoreTrigger = 0x7, | ||
| 139 | NotifyIntr = 0x8, | ||
| 140 | WrcacheFlush = 0x9, | ||
| 141 | Unk28 = 0xA, | ||
| 142 | Unk2c = 0xB, | ||
| 143 | RefCnt = 0x14, | ||
| 144 | SemaphoreAcquire = 0x1A, | ||
| 145 | SemaphoreRelease = 0x1B, | ||
| 146 | Unk70 = 0x1C, | ||
| 147 | Unk74 = 0x1D, | ||
| 148 | Unk78 = 0x1E, | ||
| 149 | Unk7c = 0x1F, | ||
| 150 | Yield = 0x20, | ||
| 151 | NonPullerMethods = 0x40, | ||
| 152 | }; | ||
| 153 | |||
| 154 | enum class GpuSemaphoreOperation { | ||
| 155 | AcquireEqual = 0x1, | ||
| 156 | WriteLong = 0x2, | ||
| 157 | AcquireGequal = 0x4, | ||
| 158 | AcquireMask = 0x8, | ||
| 130 | }; | 159 | }; |
| 131 | 160 | ||
| 132 | void GPU::CallMethod(const MethodCall& method_call) { | 161 | void GPU::CallMethod(const MethodCall& method_call) { |
| @@ -135,20 +164,78 @@ void GPU::CallMethod(const MethodCall& method_call) { | |||
| 135 | 164 | ||
| 136 | ASSERT(method_call.subchannel < bound_engines.size()); | 165 | ASSERT(method_call.subchannel < bound_engines.size()); |
| 137 | 166 | ||
| 138 | if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) { | 167 | if (ExecuteMethodOnEngine(method_call)) { |
| 139 | // Bind the current subchannel to the desired engine id. | 168 | CallEngineMethod(method_call); |
| 140 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | 169 | } else { |
| 141 | method_call.argument); | 170 | CallPullerMethod(method_call); |
| 142 | bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); | ||
| 143 | return; | ||
| 144 | } | 171 | } |
| 172 | } | ||
| 173 | |||
| 174 | bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) { | ||
| 175 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 176 | return method >= BufferMethods::NonPullerMethods; | ||
| 177 | } | ||
| 145 | 178 | ||
| 146 | if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) { | 179 | void GPU::CallPullerMethod(const MethodCall& method_call) { |
| 147 | // TODO(Subv): Research and implement these methods. | 180 | regs.reg_array[method_call.method] = method_call.argument; |
| 148 | LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); | 181 | const auto method = static_cast<BufferMethods>(method_call.method); |
| 149 | return; | 182 | |
| 183 | switch (method) { | ||
| 184 | case BufferMethods::BindObject: { | ||
| 185 | ProcessBindMethod(method_call); | ||
| 186 | break; | ||
| 187 | } | ||
| 188 | case BufferMethods::Nop: | ||
| 189 | case BufferMethods::SemaphoreAddressHigh: | ||
| 190 | case BufferMethods::SemaphoreAddressLow: | ||
| 191 | case BufferMethods::SemaphoreSequence: | ||
| 192 | case BufferMethods::RefCnt: | ||
| 193 | break; | ||
| 194 | case BufferMethods::SemaphoreTrigger: { | ||
| 195 | ProcessSemaphoreTriggerMethod(); | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | case BufferMethods::NotifyIntr: { | ||
| 199 | // TODO(Kmather73): Research and implement this method. | ||
| 200 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case BufferMethods::WrcacheFlush: { | ||
| 204 | // TODO(Kmather73): Research and implement this method. | ||
| 205 | LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); | ||
| 206 | break; | ||
| 207 | } | ||
| 208 | case BufferMethods::Unk28: { | ||
| 209 | // TODO(Kmather73): Research and implement this method. | ||
| 210 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 211 | break; | ||
| 212 | } | ||
| 213 | case BufferMethods::Unk2c: { | ||
| 214 | // TODO(Kmather73): Research and implement this method. | ||
| 215 | LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); | ||
| 216 | break; | ||
| 217 | } | ||
| 218 | case BufferMethods::SemaphoreAcquire: { | ||
| 219 | ProcessSemaphoreAcquire(); | ||
| 220 | break; | ||
| 150 | } | 221 | } |
| 222 | case BufferMethods::SemaphoreRelease: { | ||
| 223 | ProcessSemaphoreRelease(); | ||
| 224 | break; | ||
| 225 | } | ||
| 226 | case BufferMethods::Yield: { | ||
| 227 | // TODO(Kmather73): Research and implement this method. | ||
| 228 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | default: | ||
| 232 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", | ||
| 233 | static_cast<u32>(method)); | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | } | ||
| 151 | 237 | ||
| 238 | void GPU::CallEngineMethod(const MethodCall& method_call) { | ||
| 152 | const EngineID engine = bound_engines[method_call.subchannel]; | 239 | const EngineID engine = bound_engines[method_call.subchannel]; |
| 153 | 240 | ||
| 154 | switch (engine) { | 241 | switch (engine) { |
| @@ -172,4 +259,76 @@ void GPU::CallMethod(const MethodCall& method_call) { | |||
| 172 | } | 259 | } |
| 173 | } | 260 | } |
| 174 | 261 | ||
| 262 | void GPU::ProcessBindMethod(const MethodCall& method_call) { | ||
| 263 | // Bind the current subchannel to the desired engine id. | ||
| 264 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 265 | method_call.argument); | ||
| 266 | bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); | ||
| 267 | } | ||
| 268 | |||
| 269 | void GPU::ProcessSemaphoreTriggerMethod() { | ||
| 270 | const auto semaphoreOperationMask = 0xF; | ||
| 271 | const auto op = | ||
| 272 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 273 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 274 | auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 275 | struct Block { | ||
| 276 | u32 sequence; | ||
| 277 | u32 zeros = 0; | ||
| 278 | u64 timestamp; | ||
| 279 | }; | ||
| 280 | |||
| 281 | Block block{}; | ||
| 282 | block.sequence = regs.semaphore_sequence; | ||
| 283 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 284 | // CoreTiming | ||
| 285 | block.timestamp = CoreTiming::GetTicks(); | ||
| 286 | Memory::WriteBlock(*address, &block, sizeof(block)); | ||
| 287 | } else { | ||
| 288 | const auto address = | ||
| 289 | memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 290 | const u32 word = Memory::Read32(*address); | ||
| 291 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 292 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 293 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 294 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 295 | // Nothing to do in this case | ||
| 296 | } else { | ||
| 297 | regs.acquire_source = true; | ||
| 298 | regs.acquire_value = regs.semaphore_sequence; | ||
| 299 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 300 | regs.acquire_active = true; | ||
| 301 | regs.acquire_mode = false; | ||
| 302 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 303 | regs.acquire_active = true; | ||
| 304 | regs.acquire_mode = true; | ||
| 305 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 306 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 307 | // semaphore_sequence, gives a non-0 result | ||
| 308 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 309 | } else { | ||
| 310 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 311 | } | ||
| 312 | } | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | void GPU::ProcessSemaphoreRelease() { | ||
| 317 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 318 | Memory::Write32(*address, regs.semaphore_release); | ||
| 319 | } | ||
| 320 | |||
| 321 | void GPU::ProcessSemaphoreAcquire() { | ||
| 322 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 323 | const u32 word = Memory::Read32(*address); | ||
| 324 | const auto value = regs.semaphore_acquire; | ||
| 325 | if (word != value) { | ||
| 326 | regs.acquire_active = true; | ||
| 327 | regs.acquire_value = value; | ||
| 328 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 329 | regs.acquire_mode = false; | ||
| 330 | regs.acquire_source = false; | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 175 | } // namespace Tegra | 334 | } // namespace Tegra |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index af5ccd1e9..fb8975811 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -156,6 +156,46 @@ public: | |||
| 156 | /// Returns a const reference to the GPU DMA pusher. | 156 | /// Returns a const reference to the GPU DMA pusher. |
| 157 | const Tegra::DmaPusher& DmaPusher() const; | 157 | const Tegra::DmaPusher& DmaPusher() const; |
| 158 | 158 | ||
| 159 | struct Regs { | ||
| 160 | static constexpr size_t NUM_REGS = 0x100; | ||
| 161 | |||
| 162 | union { | ||
| 163 | struct { | ||
| 164 | INSERT_PADDING_WORDS(0x4); | ||
| 165 | struct { | ||
| 166 | u32 address_high; | ||
| 167 | u32 address_low; | ||
| 168 | |||
| 169 | GPUVAddr SmaphoreAddress() const { | ||
| 170 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 171 | address_low); | ||
| 172 | } | ||
| 173 | } smaphore_address; | ||
| 174 | |||
| 175 | u32 semaphore_sequence; | ||
| 176 | u32 semaphore_trigger; | ||
| 177 | INSERT_PADDING_WORDS(0xC); | ||
| 178 | |||
| 179 | // The puser and the puller share the reference counter, the pusher only has read | ||
| 180 | // access | ||
| 181 | u32 reference_count; | ||
| 182 | INSERT_PADDING_WORDS(0x5); | ||
| 183 | |||
| 184 | u32 semaphore_acquire; | ||
| 185 | u32 semaphore_release; | ||
| 186 | INSERT_PADDING_WORDS(0xE4); | ||
| 187 | |||
| 188 | // Puller state | ||
| 189 | u32 acquire_mode; | ||
| 190 | u32 acquire_source; | ||
| 191 | u32 acquire_active; | ||
| 192 | u32 acquire_timeout; | ||
| 193 | u32 acquire_value; | ||
| 194 | }; | ||
| 195 | std::array<u32, NUM_REGS> reg_array; | ||
| 196 | }; | ||
| 197 | } regs{}; | ||
| 198 | |||
| 159 | private: | 199 | private: |
| 160 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 200 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 161 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 201 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| @@ -173,6 +213,37 @@ private: | |||
| 173 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 213 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 174 | /// Inline memory engine | 214 | /// Inline memory engine |
| 175 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 215 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 216 | |||
| 217 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 218 | void ProcessSemaphoreTriggerMethod(); | ||
| 219 | void ProcessSemaphoreRelease(); | ||
| 220 | void ProcessSemaphoreAcquire(); | ||
| 221 | |||
| 222 | // Calls a GPU puller method. | ||
| 223 | void CallPullerMethod(const MethodCall& method_call); | ||
| 224 | // Calls a GPU engine method. | ||
| 225 | void CallEngineMethod(const MethodCall& method_call); | ||
| 226 | // Determines where the method should be executed. | ||
| 227 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 176 | }; | 228 | }; |
| 177 | 229 | ||
| 230 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 231 | static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ | ||
| 232 | "Field " #field_name " has invalid position") | ||
| 233 | |||
| 234 | ASSERT_REG_POSITION(smaphore_address, 0x4); | ||
| 235 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 236 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 237 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 238 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 239 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 240 | |||
| 241 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 242 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 243 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 244 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 245 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 246 | |||
| 247 | #undef ASSERT_REG_POSITION | ||
| 248 | |||
| 178 | } // namespace Tegra | 249 | } // namespace Tegra |