Merge pull request #1695 from Subv/tls_alloc

Kernel/Threads: Dynamically allocate the TLS region for threads.
author: bunnei 2016-05-12 21:51:35 -0400
committer: bunnei 2016-05-12 21:51:35 -0400
commit: 0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440 (patch)
tree: c7341fcf893002e87fbe18b0a04b221aff46bf13 /src
parent: Merge pull request #1783 from JayFoxRox/cleanup-shadersetup (diff)
parent: Kernel/Threads: Dynamically allocate the TLS region for threads in the BASE r... (diff)
download: yuzu-0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440.tar.gz
yuzu-0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440.tar.xz
yuzu-0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440.zip
5 files changed, 74 insertions, 28 deletions
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 862643448..61a741e28 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -109,7 +109,6 @@ struct MemoryArea {
 static MemoryArea memory_areas[] = {
    {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE,     "Shared Memory"}, // Shared memory
    {VRAM_VADDR,          VRAM_SIZE,              "VRAM"},          // Video memory (VRAM)
-    {TLS_AREA_VADDR,      TLS_AREA_SIZE,          "TLS Area"},      // TLS memory
 };
 }
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index a06afef2b..d781ef32c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -142,8 +142,11 @@ public:
    MemoryRegionInfo* memory_region = nullptr;
-    /// Bitmask of the used TLS slots
+    /// The Thread Local Storage area is allocated as processes create threads,
-    std::bitset<300> used_tls_slots;
+    /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
+    /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask.
+    /// This vector will grow as more pages are allocated for new threads.
+    std::vector<std::bitset<8>> tls_slots;
    VAddr GetLinearHeapAreaAddress() const;
    VAddr GetLinearHeapBase() const;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6dc95d0f1..68f026918 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,9 +117,10 @@ void Thread::Stop() {
    }
    wait_objects.clear();
-    Kernel::g_current_process->used_tls_slots[tls_index] = false;
+    // Mark the TLS slot in the thread's page as free.
-    g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE;
+    u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
-    g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE;
+    u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
+    Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
    HLE::Reschedule(__func__);
 }
@@ -366,6 +367,31 @@ static void DebugThreadQueue() {
    }
 }
+/**
+ * Finds a free location for the TLS section of a thread.
+ * @param tls_slots The TLS page array of the thread's owner process.
+ * Returns a tuple of (page, slot, alloc_needed) where:
+ * page: The index of the first allocated TLS page that has free slots.
+ * slot: The index of the first free slot in the indicated page.
+ * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
+ */
+std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) {
+    // Iterate over all the allocated pages, and try to find one where not all slots are used.
+    for (unsigned page = 0; page < tls_slots.size(); ++page) {
+        const auto& page_tls_slots = tls_slots[page];
+        if (!page_tls_slots.all()) {
+            // We found a page with at least one free slot, find which slot it is
+            for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) {
+                if (!page_tls_slots.test(slot)) {
+                    return std::make_tuple(page, slot, false);
+                }
+            }
+        }
+    }
+    return std::make_tuple(0, 0, true);
+}
 ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority,
        u32 arg, s32 processor_id, VAddr stack_top) {
    if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) {
@@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    thread->name = std::move(name);
    thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
    thread->owner_process = g_current_process;
-    thread->tls_index = -1;
    thread->waitsynch_waited = false;
    // Find the next available TLS index, and mark it as used
-    auto& used_tls_slots = Kernel::g_current_process->used_tls_slots;
+    auto& tls_slots = Kernel::g_current_process->tls_slots;
-    for (unsigned int i = 0; i < used_tls_slots.size(); ++i) {
+    bool needs_allocation = true;
-        if (used_tls_slots[i] == false) {
+    u32 available_page; // Which allocated page has free space
-            thread->tls_index = i;
+    u32 available_slot; // Which slot within the page is free
-            used_tls_slots[i] = true;
-            break;
+    std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots);
+    if (needs_allocation) {
+        // There are no already-allocated pages with free slots, lets allocate a new one.
+        // TLS pages are allocated from the BASE region in the linear heap.
+        MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE);
+        auto& linheap_memory = memory_region->linear_heap_memory;
+        if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) {
+            LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread");
+            return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent);
        }
+        u32 offset = linheap_memory->size();
+        // Allocate some memory from the end of the linear heap for this region.
+        linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0);
+        memory_region->used += Memory::PAGE_SIZE;
+        Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE;
+        tls_slots.emplace_back(0); // The page is completely available at the start
+        available_page = tls_slots.size() - 1;
+        available_slot = 0; // Use the first slot in the new page
+        auto& vm_manager = Kernel::g_current_process->vm_manager;
+        vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
+        // Map the page to the current process' address space.
+        // TODO(Subv): Find the correct MemoryState for this region.
+        vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
+                                  linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private);
    }
-    ASSERT_MSG(thread->tls_index != -1, "Out of TLS space");
+    // Mark the slot as used
-    g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE;
+    tls_slots[available_page].set(available_slot);
-    g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE;
+    thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
    // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
    // to initialize the context
@@ -509,10 +563,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
    context.cpu_registers[1] = output;
 }
-VAddr Thread::GetTLSAddress() const {
-    return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
-}
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 97ba57fc5..deab5d5a6 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -127,7 +127,7 @@ public:
     * Returns the Thread Local Storage address of the current thread
     * @returns VAddr of the thread's TLS
     */
-    VAddr GetTLSAddress() const;
+    VAddr GetTLSAddress() const { return tls_address; }
    Core::ThreadContext context;
@@ -144,7 +144,7 @@ public:
    s32 processor_id;
-    s32 tls_index; ///< Index of the Thread Local Storage of the thread
+    VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread
    bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
diff --git a/src/core/memory.h b/src/core/memory.h
index 9caa3c3f5..126d60471 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -100,15 +100,9 @@ enum : VAddr {
    SHARED_PAGE_SIZE      = 0x00001000,
    SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
-    // TODO(yuriks): The size of this area is dynamic, the kernel grows
-    // it as more and more threads are created. For now we'll just use a
-    // hardcoded value.
    /// Area where TLS (Thread-Local Storage) buffers are allocated.
    TLS_AREA_VADDR     = 0x1FF82000,
    TLS_ENTRY_SIZE     = 0x200,
-    TLS_AREA_SIZE      = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
-    TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
    /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
    NEW_LINEAR_HEAP_VADDR     = 0x30000000,
author	bunnei	2016-05-12 21:51:35 -0400
committer	bunnei	2016-05-12 21:51:35 -0400
commit	0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440 (patch)
tree	c7341fcf893002e87fbe18b0a04b221aff46bf13 /src
parent	Merge pull request #1783 from JayFoxRox/cleanup-shadersetup (diff)
parent	Kernel/Threads: Dynamically allocate the TLS region for threads in the BASE r... (diff)
download	yuzu-0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440.tar.gz yuzu-0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440.tar.xz yuzu-0d8bd3ba369a4a6264ba99a66dbc17e1e14e1440.zip

diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 862643448..61a741e28 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp
@@ -109,7 +109,6 @@ struct MemoryArea {
109	static MemoryArea memory_areas[] = {	109	static MemoryArea memory_areas[] = {
110	{SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory	110	{SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
111	{VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)	111	{VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
112	{TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
113	};	112	};
114		113
115	}	114	}


diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index a06afef2b..d781ef32c 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h
@@ -142,8 +142,11 @@ public:
142		142
143	MemoryRegionInfo* memory_region = nullptr;	143	MemoryRegionInfo* memory_region = nullptr;
144		144
145	/// Bitmask of the used TLS slots	145	/// The Thread Local Storage area is allocated as processes create threads,
146	std::bitset<300> used_tls_slots;	146	/// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
		147	/// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask.
		148	/// This vector will grow as more pages are allocated for new threads.
		149	std::vector<std::bitset<8>> tls_slots;
147		150
148	VAddr GetLinearHeapAreaAddress() const;	151	VAddr GetLinearHeapAreaAddress() const;
149	VAddr GetLinearHeapBase() const;	152	VAddr GetLinearHeapBase() const;


diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 6dc95d0f1..68f026918 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp
@@ -117,9 +117,10 @@ void Thread::Stop() {
117	}	117	}
118	wait_objects.clear();	118	wait_objects.clear();
119		119
120	Kernel::g_current_process->used_tls_slots[tls_index] = false;	120	// Mark the TLS slot in the thread's page as free.
121	g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE;	121	u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
122	g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE;	122	u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
		123	Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
123		124
124	HLE::Reschedule(__func__);	125	HLE::Reschedule(__func__);
125	}	126	}
@@ -366,6 +367,31 @@ static void DebugThreadQueue() {
366	}	367	}
367	}	368	}
368		369
		370	/**
		371	* Finds a free location for the TLS section of a thread.
		372	* @param tls_slots The TLS page array of the thread's owner process.
		373	* Returns a tuple of (page, slot, alloc_needed) where:
		374	* page: The index of the first allocated TLS page that has free slots.
		375	* slot: The index of the first free slot in the indicated page.
		376	* alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
		377	*/
		378	std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) {
		379	// Iterate over all the allocated pages, and try to find one where not all slots are used.
		380	for (unsigned page = 0; page < tls_slots.size(); ++page) {
		381	const auto& page_tls_slots = tls_slots[page];
		382	if (!page_tls_slots.all()) {
		383	// We found a page with at least one free slot, find which slot it is
		384	for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) {
		385	if (!page_tls_slots.test(slot)) {
		386	return std::make_tuple(page, slot, false);
		387	}
		388	}
		389	}
		390	}
		391
		392	return std::make_tuple(0, 0, true);
		393	}
		394
369	ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority,	395	ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority,
370	u32 arg, s32 processor_id, VAddr stack_top) {	396	u32 arg, s32 processor_id, VAddr stack_top) {
371	if (priority < THREADPRIO_HIGHEST \|\| priority > THREADPRIO_LOWEST) {	397	if (priority < THREADPRIO_HIGHEST \|\| priority > THREADPRIO_LOWEST) {
@@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
403	thread->name = std::move(name);	429	thread->name = std::move(name);
404	thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();	430	thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
405	thread->owner_process = g_current_process;	431	thread->owner_process = g_current_process;
406	thread->tls_index = -1;
407	thread->waitsynch_waited = false;	432	thread->waitsynch_waited = false;
408		433
409	// Find the next available TLS index, and mark it as used	434	// Find the next available TLS index, and mark it as used
410	auto& used_tls_slots = Kernel::g_current_process->used_tls_slots;	435	auto& tls_slots = Kernel::g_current_process->tls_slots;
411	for (unsigned int i = 0; i < used_tls_slots.size(); ++i) {	436	bool needs_allocation = true;
412	if (used_tls_slots[i] == false) {	437	u32 available_page; // Which allocated page has free space
413	thread->tls_index = i;	438	u32 available_slot; // Which slot within the page is free
414	used_tls_slots[i] = true;	439
415	break;	440	std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots);
		441
		442	if (needs_allocation) {
		443	// There are no already-allocated pages with free slots, lets allocate a new one.
		444	// TLS pages are allocated from the BASE region in the linear heap.
		445	MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE);
		446	auto& linheap_memory = memory_region->linear_heap_memory;
		447
		448	if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) {
		449	LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread");
		450	return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent);
416	}	451	}
		452
		453	u32 offset = linheap_memory->size();
		454
		455	// Allocate some memory from the end of the linear heap for this region.
		456	linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0);
		457	memory_region->used += Memory::PAGE_SIZE;
		458	Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE;
		459
		460	tls_slots.emplace_back(0); // The page is completely available at the start
		461	available_page = tls_slots.size() - 1;
		462	available_slot = 0; // Use the first slot in the new page
		463
		464	auto& vm_manager = Kernel::g_current_process->vm_manager;
		465	vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
		466
		467	// Map the page to the current process' address space.
		468	// TODO(Subv): Find the correct MemoryState for this region.
		469	vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
		470	linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private);
417	}	471	}
418		472
419	ASSERT_MSG(thread->tls_index != -1, "Out of TLS space");	473	// Mark the slot as used
420	g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE;	474	tls_slots[available_page].set(available_slot);
421	g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE;	475	thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
422		476
423	// TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used	477	// TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
424	// to initialize the context	478	// to initialize the context
@@ -509,10 +563,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
509	context.cpu_registers[1] = output;	563	context.cpu_registers[1] = output;
510	}	564	}
511		565
512	VAddr Thread::GetTLSAddress() const {
513	return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
514	}
515
516	////////////////////////////////////////////////////////////////////////////////////////////////////	566	////////////////////////////////////////////////////////////////////////////////////////////////////
517		567
518	void ThreadingInit() {	568	void ThreadingInit() {


diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 97ba57fc5..deab5d5a6 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h
@@ -127,7 +127,7 @@ public:
127	* Returns the Thread Local Storage address of the current thread	127	* Returns the Thread Local Storage address of the current thread
128	* @returns VAddr of the thread's TLS	128	* @returns VAddr of the thread's TLS
129	*/	129	*/
130	VAddr GetTLSAddress() const;	130	VAddr GetTLSAddress() const { return tls_address; }
131		131
132	Core::ThreadContext context;	132	Core::ThreadContext context;
133		133
@@ -144,7 +144,7 @@ public:
144		144
145	s32 processor_id;	145	s32 processor_id;
146		146
147	s32 tls_index; ///< Index of the Thread Local Storage of the thread	147	VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread
148		148
149	bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait	149	bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
150		150


diff --git a/src/core/memory.h b/src/core/memory.h index 9caa3c3f5..126d60471 100644 --- a/src/core/memory.h +++ b/src/core/memory.h
@@ -100,15 +100,9 @@ enum : VAddr {
100	SHARED_PAGE_SIZE = 0x00001000,	100	SHARED_PAGE_SIZE = 0x00001000,
101	SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,	101	SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
102		102
103	// TODO(yuriks): The size of this area is dynamic, the kernel grows
104	// it as more and more threads are created. For now we'll just use a
105	// hardcoded value.
106	/// Area where TLS (Thread-Local Storage) buffers are allocated.	103	/// Area where TLS (Thread-Local Storage) buffers are allocated.
107	TLS_AREA_VADDR = 0x1FF82000,	104	TLS_AREA_VADDR = 0x1FF82000,
108	TLS_ENTRY_SIZE = 0x200,	105	TLS_ENTRY_SIZE = 0x200,
109	TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
110	TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
111
112		106
113	/// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.	107	/// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
114	NEW_LINEAR_HEAP_VADDR = 0x30000000,	108	NEW_LINEAR_HEAP_VADDR = 0x30000000,