diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/citra_qt/debugger/graphics_tracing.cpp | 2 | ||||
| -rw-r--r-- | src/core/hle/kernel/memory.cpp | 1 | ||||
| -rw-r--r-- | src/core/hle/kernel/process.h | 7 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.cpp | 84 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.h | 4 | ||||
| -rw-r--r-- | src/core/memory.h | 6 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/pica_state.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 19 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 68 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.cpp | 2 |
12 files changed, 121 insertions, 78 deletions
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp index 1402f8e79..9c80f7ec9 100644 --- a/src/citra_qt/debugger/graphics_tracing.cpp +++ b/src/citra_qt/debugger/graphics_tracing.cpp | |||
| @@ -74,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() { | |||
| 74 | std::array<u32, 4 * 16> default_attributes; | 74 | std::array<u32, 4 * 16> default_attributes; |
| 75 | for (unsigned i = 0; i < 16; ++i) { | 75 | for (unsigned i = 0; i < 16; ++i) { |
| 76 | for (unsigned comp = 0; comp < 3; ++comp) { | 76 | for (unsigned comp = 0; comp < 3; ++comp) { |
| 77 | default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); | 77 | default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32()); |
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | 80 | ||
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 862643448..61a741e28 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp | |||
| @@ -109,7 +109,6 @@ struct MemoryArea { | |||
| 109 | static MemoryArea memory_areas[] = { | 109 | static MemoryArea memory_areas[] = { |
| 110 | {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory | 110 | {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory |
| 111 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) | 111 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) |
| 112 | {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory | ||
| 113 | }; | 112 | }; |
| 114 | 113 | ||
| 115 | } | 114 | } |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index a06afef2b..d781ef32c 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -142,8 +142,11 @@ public: | |||
| 142 | 142 | ||
| 143 | MemoryRegionInfo* memory_region = nullptr; | 143 | MemoryRegionInfo* memory_region = nullptr; |
| 144 | 144 | ||
| 145 | /// Bitmask of the used TLS slots | 145 | /// The Thread Local Storage area is allocated as processes create threads, |
| 146 | std::bitset<300> used_tls_slots; | 146 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part |
| 147 | /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask. | ||
| 148 | /// This vector will grow as more pages are allocated for new threads. | ||
| 149 | std::vector<std::bitset<8>> tls_slots; | ||
| 147 | 150 | ||
| 148 | VAddr GetLinearHeapAreaAddress() const; | 151 | VAddr GetLinearHeapAreaAddress() const; |
| 149 | VAddr GetLinearHeapBase() const; | 152 | VAddr GetLinearHeapBase() const; |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 6dc95d0f1..68f026918 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -117,9 +117,10 @@ void Thread::Stop() { | |||
| 117 | } | 117 | } |
| 118 | wait_objects.clear(); | 118 | wait_objects.clear(); |
| 119 | 119 | ||
| 120 | Kernel::g_current_process->used_tls_slots[tls_index] = false; | 120 | // Mark the TLS slot in the thread's page as free. |
| 121 | g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; | 121 | u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE; |
| 122 | g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE; | 122 | u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE; |
| 123 | Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot); | ||
| 123 | 124 | ||
| 124 | HLE::Reschedule(__func__); | 125 | HLE::Reschedule(__func__); |
| 125 | } | 126 | } |
| @@ -366,6 +367,31 @@ static void DebugThreadQueue() { | |||
| 366 | } | 367 | } |
| 367 | } | 368 | } |
| 368 | 369 | ||
| 370 | /** | ||
| 371 | * Finds a free location for the TLS section of a thread. | ||
| 372 | * @param tls_slots The TLS page array of the thread's owner process. | ||
| 373 | * Returns a tuple of (page, slot, alloc_needed) where: | ||
| 374 | * page: The index of the first allocated TLS page that has free slots. | ||
| 375 | * slot: The index of the first free slot in the indicated page. | ||
| 376 | * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full). | ||
| 377 | */ | ||
| 378 | std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) { | ||
| 379 | // Iterate over all the allocated pages, and try to find one where not all slots are used. | ||
| 380 | for (unsigned page = 0; page < tls_slots.size(); ++page) { | ||
| 381 | const auto& page_tls_slots = tls_slots[page]; | ||
| 382 | if (!page_tls_slots.all()) { | ||
| 383 | // We found a page with at least one free slot, find which slot it is | ||
| 384 | for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) { | ||
| 385 | if (!page_tls_slots.test(slot)) { | ||
| 386 | return std::make_tuple(page, slot, false); | ||
| 387 | } | ||
| 388 | } | ||
| 389 | } | ||
| 390 | } | ||
| 391 | |||
| 392 | return std::make_tuple(0, 0, true); | ||
| 393 | } | ||
| 394 | |||
| 369 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, | 395 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, |
| 370 | u32 arg, s32 processor_id, VAddr stack_top) { | 396 | u32 arg, s32 processor_id, VAddr stack_top) { |
| 371 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { | 397 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { |
| @@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, | |||
| 403 | thread->name = std::move(name); | 429 | thread->name = std::move(name); |
| 404 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); | 430 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); |
| 405 | thread->owner_process = g_current_process; | 431 | thread->owner_process = g_current_process; |
| 406 | thread->tls_index = -1; | ||
| 407 | thread->waitsynch_waited = false; | 432 | thread->waitsynch_waited = false; |
| 408 | 433 | ||
| 409 | // Find the next available TLS index, and mark it as used | 434 | // Find the next available TLS index, and mark it as used |
| 410 | auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; | 435 | auto& tls_slots = Kernel::g_current_process->tls_slots; |
| 411 | for (unsigned int i = 0; i < used_tls_slots.size(); ++i) { | 436 | bool needs_allocation = true; |
| 412 | if (used_tls_slots[i] == false) { | 437 | u32 available_page; // Which allocated page has free space |
| 413 | thread->tls_index = i; | 438 | u32 available_slot; // Which slot within the page is free |
| 414 | used_tls_slots[i] = true; | 439 | |
| 415 | break; | 440 | std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots); |
| 441 | |||
| 442 | if (needs_allocation) { | ||
| 443 | // There are no already-allocated pages with free slots, lets allocate a new one. | ||
| 444 | // TLS pages are allocated from the BASE region in the linear heap. | ||
| 445 | MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE); | ||
| 446 | auto& linheap_memory = memory_region->linear_heap_memory; | ||
| 447 | |||
| 448 | if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) { | ||
| 449 | LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread"); | ||
| 450 | return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent); | ||
| 416 | } | 451 | } |
| 452 | |||
| 453 | u32 offset = linheap_memory->size(); | ||
| 454 | |||
| 455 | // Allocate some memory from the end of the linear heap for this region. | ||
| 456 | linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0); | ||
| 457 | memory_region->used += Memory::PAGE_SIZE; | ||
| 458 | Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE; | ||
| 459 | |||
| 460 | tls_slots.emplace_back(0); // The page is completely available at the start | ||
| 461 | available_page = tls_slots.size() - 1; | ||
| 462 | available_slot = 0; // Use the first slot in the new page | ||
| 463 | |||
| 464 | auto& vm_manager = Kernel::g_current_process->vm_manager; | ||
| 465 | vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); | ||
| 466 | |||
| 467 | // Map the page to the current process' address space. | ||
| 468 | // TODO(Subv): Find the correct MemoryState for this region. | ||
| 469 | vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE, | ||
| 470 | linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private); | ||
| 417 | } | 471 | } |
| 418 | 472 | ||
| 419 | ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); | 473 | // Mark the slot as used |
| 420 | g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; | 474 | tls_slots[available_page].set(available_slot); |
| 421 | g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE; | 475 | thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE; |
| 422 | 476 | ||
| 423 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used | 477 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used |
| 424 | // to initialize the context | 478 | // to initialize the context |
| @@ -509,10 +563,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { | |||
| 509 | context.cpu_registers[1] = output; | 563 | context.cpu_registers[1] = output; |
| 510 | } | 564 | } |
| 511 | 565 | ||
| 512 | VAddr Thread::GetTLSAddress() const { | ||
| 513 | return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE; | ||
| 514 | } | ||
| 515 | |||
| 516 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 566 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 517 | 567 | ||
| 518 | void ThreadingInit() { | 568 | void ThreadingInit() { |
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 97ba57fc5..deab5d5a6 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -127,7 +127,7 @@ public: | |||
| 127 | * Returns the Thread Local Storage address of the current thread | 127 | * Returns the Thread Local Storage address of the current thread |
| 128 | * @returns VAddr of the thread's TLS | 128 | * @returns VAddr of the thread's TLS |
| 129 | */ | 129 | */ |
| 130 | VAddr GetTLSAddress() const; | 130 | VAddr GetTLSAddress() const { return tls_address; } |
| 131 | 131 | ||
| 132 | Core::ThreadContext context; | 132 | Core::ThreadContext context; |
| 133 | 133 | ||
| @@ -144,7 +144,7 @@ public: | |||
| 144 | 144 | ||
| 145 | s32 processor_id; | 145 | s32 processor_id; |
| 146 | 146 | ||
| 147 | s32 tls_index; ///< Index of the Thread Local Storage of the thread | 147 | VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread |
| 148 | 148 | ||
| 149 | bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait | 149 | bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait |
| 150 | 150 | ||
diff --git a/src/core/memory.h b/src/core/memory.h index 9caa3c3f5..126d60471 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -100,15 +100,9 @@ enum : VAddr { | |||
| 100 | SHARED_PAGE_SIZE = 0x00001000, | 100 | SHARED_PAGE_SIZE = 0x00001000, |
| 101 | SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, | 101 | SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, |
| 102 | 102 | ||
| 103 | // TODO(yuriks): The size of this area is dynamic, the kernel grows | ||
| 104 | // it as more and more threads are created. For now we'll just use a | ||
| 105 | // hardcoded value. | ||
| 106 | /// Area where TLS (Thread-Local Storage) buffers are allocated. | 103 | /// Area where TLS (Thread-Local Storage) buffers are allocated. |
| 107 | TLS_AREA_VADDR = 0x1FF82000, | 104 | TLS_AREA_VADDR = 0x1FF82000, |
| 108 | TLS_ENTRY_SIZE = 0x200, | 105 | TLS_ENTRY_SIZE = 0x200, |
| 109 | TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size | ||
| 110 | TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, | ||
| 111 | |||
| 112 | 106 | ||
| 113 | /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. | 107 | /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. |
| 114 | NEW_LINEAR_HEAP_VADDR = 0x30000000, | 108 | NEW_LINEAR_HEAP_VADDR = 0x30000000, |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index e7dc5ddac..ad0da796e 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -128,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 128 | 128 | ||
| 129 | // TODO: Verify that this actually modifies the register! | 129 | // TODO: Verify that this actually modifies the register! |
| 130 | if (setup.index < 15) { | 130 | if (setup.index < 15) { |
| 131 | g_state.vs.default_attributes[setup.index] = attribute; | 131 | g_state.vs_default_attributes[setup.index] = attribute; |
| 132 | setup.index++; | 132 | setup.index++; |
| 133 | } else { | 133 | } else { |
| 134 | // Put each attribute into an immediate input buffer. | 134 | // Put each attribute into an immediate input buffer. |
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 1059c6ae4..495174c25 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -25,6 +25,8 @@ struct State { | |||
| 25 | Shader::ShaderSetup vs; | 25 | Shader::ShaderSetup vs; |
| 26 | Shader::ShaderSetup gs; | 26 | Shader::ShaderSetup gs; |
| 27 | 27 | ||
| 28 | std::array<Math::Vec4<float24>, 16> vs_default_attributes; | ||
| 29 | |||
| 28 | struct { | 30 | struct { |
| 29 | union LutEntry { | 31 | union LutEntry { |
| 30 | // Used for raw access | 32 | // Used for raw access |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 449fc703f..e93a9d92a 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -67,7 +67,6 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 67 | 67 | ||
| 68 | MICROPROFILE_SCOPE(GPU_Shader); | 68 | MICROPROFILE_SCOPE(GPU_Shader); |
| 69 | 69 | ||
| 70 | state.program_counter = config.main_offset; | ||
| 71 | state.debug.max_offset = 0; | 70 | state.debug.max_offset = 0; |
| 72 | state.debug.max_opdesc_id = 0; | 71 | state.debug.max_opdesc_id = 0; |
| 73 | 72 | ||
| @@ -143,7 +142,6 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 143 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { | 142 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { |
| 144 | UnitState<true> state; | 143 | UnitState<true> state; |
| 145 | 144 | ||
| 146 | state.program_counter = config.main_offset; | ||
| 147 | state.debug.max_offset = 0; | 145 | state.debug.max_offset = 0; |
| 148 | state.debug.max_opdesc_id = 0; | 146 | state.debug.max_opdesc_id = 0; |
| 149 | 147 | ||
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 7f417675a..983e4a967 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -272,29 +272,12 @@ struct UnitState { | |||
| 272 | } registers; | 272 | } registers; |
| 273 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | 273 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); |
| 274 | 274 | ||
| 275 | u32 program_counter; | ||
| 276 | bool conditional_code[2]; | 275 | bool conditional_code[2]; |
| 277 | 276 | ||
| 278 | // Two Address registers and one loop counter | 277 | // Two Address registers and one loop counter |
| 279 | // TODO: How many bits do these actually have? | 278 | // TODO: How many bits do these actually have? |
| 280 | s32 address_registers[3]; | 279 | s32 address_registers[3]; |
| 281 | 280 | ||
| 282 | enum { | ||
| 283 | INVALID_ADDRESS = 0xFFFFFFFF | ||
| 284 | }; | ||
| 285 | |||
| 286 | struct CallStackElement { | ||
| 287 | u32 final_address; // Address upon which we jump to return_address | ||
| 288 | u32 return_address; // Where to jump when leaving scope | ||
| 289 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 290 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 291 | // TODO: Should this be a signed value? Does it even matter? | ||
| 292 | u32 loop_address; // The address where we'll return to after each loop iteration | ||
| 293 | }; | ||
| 294 | |||
| 295 | // TODO: Is there a maximal size for this? | ||
| 296 | boost::container::static_vector<CallStackElement, 16> call_stack; | ||
| 297 | |||
| 298 | DebugData<Debug> debug; | 281 | DebugData<Debug> debug; |
| 299 | 282 | ||
| 300 | static size_t InputOffset(const SourceRegister& reg) { | 283 | static size_t InputOffset(const SourceRegister& reg) { |
| @@ -340,8 +323,6 @@ struct ShaderSetup { | |||
| 340 | std::array<Math::Vec4<u8>, 4> i; | 323 | std::array<Math::Vec4<u8>, 4> i; |
| 341 | } uniforms; | 324 | } uniforms; |
| 342 | 325 | ||
| 343 | Math::Vec4<float24> default_attributes[16]; | ||
| 344 | |||
| 345 | std::array<u32, 1024> program_code; | 326 | std::array<u32, 1024> program_code; |
| 346 | std::array<u32, 1024> swizzle_data; | 327 | std::array<u32, 1024> swizzle_data; |
| 347 | 328 | ||
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 7710f7fbc..3a827d11f 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -29,8 +29,24 @@ namespace Pica { | |||
| 29 | 29 | ||
| 30 | namespace Shader { | 30 | namespace Shader { |
| 31 | 31 | ||
| 32 | constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF; | ||
| 33 | |||
| 34 | struct CallStackElement { | ||
| 35 | u32 final_address; // Address upon which we jump to return_address | ||
| 36 | u32 return_address; // Where to jump when leaving scope | ||
| 37 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 38 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 39 | // TODO: Should this be a signed value? Does it even matter? | ||
| 40 | u32 loop_address; // The address where we'll return to after each loop iteration | ||
| 41 | }; | ||
| 42 | |||
| 32 | template<bool Debug> | 43 | template<bool Debug> |
| 33 | void RunInterpreter(UnitState<Debug>& state) { | 44 | void RunInterpreter(UnitState<Debug>& state) { |
| 45 | // TODO: Is there a maximal size for this? | ||
| 46 | boost::container::static_vector<CallStackElement, 16> call_stack; | ||
| 47 | |||
| 48 | u32 program_counter = g_state.regs.vs.main_offset; | ||
| 49 | |||
| 34 | const auto& uniforms = g_state.vs.uniforms; | 50 | const auto& uniforms = g_state.vs.uniforms; |
| 35 | const auto& swizzle_data = g_state.vs.swizzle_data; | 51 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| 36 | const auto& program_code = g_state.vs.program_code; | 52 | const auto& program_code = g_state.vs.program_code; |
| @@ -41,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 41 | unsigned iteration = 0; | 57 | unsigned iteration = 0; |
| 42 | bool exit_loop = false; | 58 | bool exit_loop = false; |
| 43 | while (!exit_loop) { | 59 | while (!exit_loop) { |
| 44 | if (!state.call_stack.empty()) { | 60 | if (!call_stack.empty()) { |
| 45 | auto& top = state.call_stack.back(); | 61 | auto& top = call_stack.back(); |
| 46 | if (state.program_counter == top.final_address) { | 62 | if (program_counter == top.final_address) { |
| 47 | state.address_registers[2] += top.loop_increment; | 63 | state.address_registers[2] += top.loop_increment; |
| 48 | 64 | ||
| 49 | if (top.repeat_counter-- == 0) { | 65 | if (top.repeat_counter-- == 0) { |
| 50 | state.program_counter = top.return_address; | 66 | program_counter = top.return_address; |
| 51 | state.call_stack.pop_back(); | 67 | call_stack.pop_back(); |
| 52 | } else { | 68 | } else { |
| 53 | state.program_counter = top.loop_address; | 69 | program_counter = top.loop_address; |
| 54 | } | 70 | } |
| 55 | 71 | ||
| 56 | // TODO: Is "trying again" accurate to hardware? | 72 | // TODO: Is "trying again" accurate to hardware? |
| @@ -58,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 58 | } | 74 | } |
| 59 | } | 75 | } |
| 60 | 76 | ||
| 61 | const Instruction instr = { program_code[state.program_counter] }; | 77 | const Instruction instr = { program_code[program_counter] }; |
| 62 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 78 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |
| 63 | 79 | ||
| 64 | static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, | 80 | static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, |
| 65 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 81 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 66 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 82 | program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 67 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); | 83 | ASSERT(call_stack.size() < call_stack.capacity()); |
| 68 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 84 | call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |
| 69 | }; | 85 | }; |
| 70 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | 86 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); |
| 71 | if (iteration > 0) | 87 | if (iteration > 0) |
| 72 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | 88 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter); |
| 73 | 89 | ||
| 74 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | 90 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter); |
| 75 | 91 | ||
| 76 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 92 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 77 | switch (source_reg.GetRegisterType()) { | 93 | switch (source_reg.GetRegisterType()) { |
| @@ -519,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 519 | case OpCode::Id::JMPC: | 535 | case OpCode::Id::JMPC: |
| 520 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 536 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); |
| 521 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 537 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 522 | state.program_counter = instr.flow_control.dest_offset - 1; | 538 | program_counter = instr.flow_control.dest_offset - 1; |
| 523 | } | 539 | } |
| 524 | break; | 540 | break; |
| 525 | 541 | ||
| @@ -527,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 527 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 543 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 528 | 544 | ||
| 529 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { | 545 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { |
| 530 | state.program_counter = instr.flow_control.dest_offset - 1; | 546 | program_counter = instr.flow_control.dest_offset - 1; |
| 531 | } | 547 | } |
| 532 | break; | 548 | break; |
| 533 | 549 | ||
| @@ -535,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 535 | call(state, | 551 | call(state, |
| 536 | instr.flow_control.dest_offset, | 552 | instr.flow_control.dest_offset, |
| 537 | instr.flow_control.num_instructions, | 553 | instr.flow_control.num_instructions, |
| 538 | state.program_counter + 1, 0, 0); | 554 | program_counter + 1, 0, 0); |
| 539 | break; | 555 | break; |
| 540 | 556 | ||
| 541 | case OpCode::Id::CALLU: | 557 | case OpCode::Id::CALLU: |
| @@ -544,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 544 | call(state, | 560 | call(state, |
| 545 | instr.flow_control.dest_offset, | 561 | instr.flow_control.dest_offset, |
| 546 | instr.flow_control.num_instructions, | 562 | instr.flow_control.num_instructions, |
| 547 | state.program_counter + 1, 0, 0); | 563 | program_counter + 1, 0, 0); |
| 548 | } | 564 | } |
| 549 | break; | 565 | break; |
| 550 | 566 | ||
| @@ -554,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 554 | call(state, | 570 | call(state, |
| 555 | instr.flow_control.dest_offset, | 571 | instr.flow_control.dest_offset, |
| 556 | instr.flow_control.num_instructions, | 572 | instr.flow_control.num_instructions, |
| 557 | state.program_counter + 1, 0, 0); | 573 | program_counter + 1, 0, 0); |
| 558 | } | 574 | } |
| 559 | break; | 575 | break; |
| 560 | 576 | ||
| @@ -565,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 565 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 581 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 566 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 582 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 567 | call(state, | 583 | call(state, |
| 568 | state.program_counter + 1, | 584 | program_counter + 1, |
| 569 | instr.flow_control.dest_offset - state.program_counter - 1, | 585 | instr.flow_control.dest_offset - program_counter - 1, |
| 570 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 586 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 571 | } else { | 587 | } else { |
| 572 | call(state, | 588 | call(state, |
| @@ -584,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 584 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 600 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); |
| 585 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 601 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 586 | call(state, | 602 | call(state, |
| 587 | state.program_counter + 1, | 603 | program_counter + 1, |
| 588 | instr.flow_control.dest_offset - state.program_counter - 1, | 604 | instr.flow_control.dest_offset - program_counter - 1, |
| 589 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 605 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 590 | } else { | 606 | } else { |
| 591 | call(state, | 607 | call(state, |
| @@ -607,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 607 | 623 | ||
| 608 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | 624 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); |
| 609 | call(state, | 625 | call(state, |
| 610 | state.program_counter + 1, | 626 | program_counter + 1, |
| 611 | instr.flow_control.dest_offset - state.program_counter + 1, | 627 | instr.flow_control.dest_offset - program_counter + 1, |
| 612 | instr.flow_control.dest_offset + 1, | 628 | instr.flow_control.dest_offset + 1, |
| 613 | loop_param.x, | 629 | loop_param.x, |
| 614 | loop_param.z); | 630 | loop_param.z); |
| @@ -625,7 +641,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 625 | } | 641 | } |
| 626 | } | 642 | } |
| 627 | 643 | ||
| 628 | ++state.program_counter; | 644 | ++program_counter; |
| 629 | ++iteration; | 645 | ++iteration; |
| 630 | } | 646 | } |
| 631 | } | 647 | } |
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 21ae52949..83896814f 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp | |||
| @@ -124,7 +124,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I | |||
| 124 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | 124 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |
| 125 | } else if (vertex_attribute_is_default[i]) { | 125 | } else if (vertex_attribute_is_default[i]) { |
| 126 | // Load the default attribute if we're configured to do so | 126 | // Load the default attribute if we're configured to do so |
| 127 | input.attr[i] = g_state.vs.default_attributes[i]; | 127 | input.attr[i] = g_state.vs_default_attributes[i]; |
| 128 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | 128 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", |
| 129 | i, vertex, index, | 129 | i, vertex, index, |
| 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |