summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/CMakeLists.txt2
-rw-r--r--src/audio_core/hle/dsp.cpp16
-rw-r--r--src/audio_core/time_stretch.cpp144
-rw-r--r--src/audio_core/time_stretch.h57
-rw-r--r--src/citra_qt/debugger/graphics_tracing.cpp2
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/hle/applets/applet.h1
-rw-r--r--src/core/hle/applets/mii_selector.cpp9
-rw-r--r--src/core/hle/applets/swkbd.cpp8
-rw-r--r--src/core/hle/function_wrappers.h3
-rw-r--r--src/core/hle/kernel/memory.cpp5
-rw-r--r--src/core/hle/kernel/process.cpp2
-rw-r--r--src/core/hle/kernel/process.h7
-rw-r--r--src/core/hle/kernel/shared_memory.cpp177
-rw-r--r--src/core/hle/kernel/shared_memory.h48
-rw-r--r--src/core/hle/kernel/thread.cpp84
-rw-r--r--src/core/hle/kernel/thread.h4
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/apt/apt.cpp58
-rw-r--r--src/core/hle/service/apt/bcfnt/bcfnt.cpp71
-rw-r--r--src/core/hle/service/apt/bcfnt/bcfnt.h87
-rw-r--r--src/core/hle/service/csnd_snd.cpp13
-rw-r--r--src/core/hle/service/dsp_dsp.cpp4
-rw-r--r--src/core/hle/service/gsp_gpu.cpp5
-rw-r--r--src/core/hle/service/hid/hid.cpp5
-rw-r--r--src/core/hle/service/ir/ir.cpp5
-rw-r--r--src/core/hle/svc.cpp49
-rw-r--r--src/core/memory.h6
-rw-r--r--src/video_core/command_processor.cpp2
-rw-r--r--src/video_core/pica_state.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h11
-rw-r--r--src/video_core/shader/shader.cpp11
-rw-r--r--src/video_core/shader/shader.h42
-rw-r--r--src/video_core/shader/shader_interpreter.cpp74
-rw-r--r--src/video_core/shader/shader_interpreter.h2
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp32
-rw-r--r--src/video_core/shader/shader_jit_x64.h6
-rw-r--r--src/video_core/vertex_loader.cpp2
39 files changed, 834 insertions, 249 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 13b5e400e..eba0a5697 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -7,6 +7,7 @@ set(SRCS
7 hle/source.cpp 7 hle/source.cpp
8 interpolate.cpp 8 interpolate.cpp
9 sink_details.cpp 9 sink_details.cpp
10 time_stretch.cpp
10 ) 11 )
11 12
12set(HEADERS 13set(HEADERS
@@ -21,6 +22,7 @@ set(HEADERS
21 null_sink.h 22 null_sink.h
22 sink.h 23 sink.h
23 sink_details.h 24 sink_details.h
25 time_stretch.h
24 ) 26 )
25 27
26include_directories(../../externals/soundtouch/include) 28include_directories(../../externals/soundtouch/include)
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index 0cdbdb06a..5113ad8ca 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -9,6 +9,7 @@
9#include "audio_core/hle/pipe.h" 9#include "audio_core/hle/pipe.h"
10#include "audio_core/hle/source.h" 10#include "audio_core/hle/source.h"
11#include "audio_core/sink.h" 11#include "audio_core/sink.h"
12#include "audio_core/time_stretch.h"
12 13
13namespace DSP { 14namespace DSP {
14namespace HLE { 15namespace HLE {
@@ -48,15 +49,29 @@ static std::array<Source, num_sources> sources = {
48}; 49};
49 50
50static std::unique_ptr<AudioCore::Sink> sink; 51static std::unique_ptr<AudioCore::Sink> sink;
52static AudioCore::TimeStretcher time_stretcher;
51 53
52void Init() { 54void Init() {
53 DSP::HLE::ResetPipes(); 55 DSP::HLE::ResetPipes();
56
54 for (auto& source : sources) { 57 for (auto& source : sources) {
55 source.Reset(); 58 source.Reset();
56 } 59 }
60
61 time_stretcher.Reset();
62 if (sink) {
63 time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
64 }
57} 65}
58 66
59void Shutdown() { 67void Shutdown() {
68 time_stretcher.Flush();
69 while (true) {
70 std::vector<s16> residual_audio = time_stretcher.Process(sink->SamplesInQueue());
71 if (residual_audio.empty())
72 break;
73 sink->EnqueueSamples(residual_audio);
74 }
60} 75}
61 76
62bool Tick() { 77bool Tick() {
@@ -77,6 +92,7 @@ bool Tick() {
77 92
78void SetSink(std::unique_ptr<AudioCore::Sink> sink_) { 93void SetSink(std::unique_ptr<AudioCore::Sink> sink_) {
79 sink = std::move(sink_); 94 sink = std::move(sink_);
95 time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
80} 96}
81 97
82} // namespace HLE 98} // namespace HLE
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
new file mode 100644
index 000000000..ea38f40d0
--- /dev/null
+++ b/src/audio_core/time_stretch.cpp
@@ -0,0 +1,144 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <cmath>
7#include <vector>
8
9#include <SoundTouch.h>
10
11#include "audio_core/audio_core.h"
12#include "audio_core/time_stretch.h"
13
14#include "common/common_types.h"
15#include "common/logging/log.h"
16#include "common/math_util.h"
17
18using steady_clock = std::chrono::steady_clock;
19
20namespace AudioCore {
21
22constexpr double MIN_RATIO = 0.1;
23constexpr double MAX_RATIO = 100.0;
24
25static double ClampRatio(double ratio) {
26 return MathUtil::Clamp(ratio, MIN_RATIO, MAX_RATIO);
27}
28
29constexpr double MIN_DELAY_TIME = 0.05; // Units: seconds
30constexpr double MAX_DELAY_TIME = 0.25; // Units: seconds
31constexpr size_t DROP_FRAMES_SAMPLE_DELAY = 16000; // Units: samples
32
33constexpr double SMOOTHING_FACTOR = 0.007;
34
35struct TimeStretcher::Impl {
36 soundtouch::SoundTouch soundtouch;
37
38 steady_clock::time_point frame_timer = steady_clock::now();
39 size_t samples_queued = 0;
40
41 double smoothed_ratio = 1.0;
42
43 double sample_rate = static_cast<double>(native_sample_rate);
44};
45
46std::vector<s16> TimeStretcher::Process(size_t samples_in_queue) {
47 // This is a very simple algorithm without any fancy control theory. It works and is stable.
48
49 double ratio = CalculateCurrentRatio();
50 ratio = CorrectForUnderAndOverflow(ratio, samples_in_queue);
51 impl->smoothed_ratio = (1.0 - SMOOTHING_FACTOR) * impl->smoothed_ratio + SMOOTHING_FACTOR * ratio;
52 impl->smoothed_ratio = ClampRatio(impl->smoothed_ratio);
53
54 // SoundTouch's tempo definition the inverse of our ratio definition.
55 impl->soundtouch.setTempo(1.0 / impl->smoothed_ratio);
56
57 std::vector<s16> samples = GetSamples();
58 if (samples_in_queue >= DROP_FRAMES_SAMPLE_DELAY) {
59 samples.clear();
60 LOG_DEBUG(Audio, "Dropping frames!");
61 }
62 return samples;
63}
64
65TimeStretcher::TimeStretcher() : impl(std::make_unique<Impl>()) {
66 impl->soundtouch.setPitch(1.0);
67 impl->soundtouch.setChannels(2);
68 impl->soundtouch.setSampleRate(native_sample_rate);
69 Reset();
70}
71
72TimeStretcher::~TimeStretcher() {
73 impl->soundtouch.clear();
74}
75
76void TimeStretcher::SetOutputSampleRate(unsigned int sample_rate) {
77 impl->sample_rate = static_cast<double>(sample_rate);
78 impl->soundtouch.setRate(static_cast<double>(native_sample_rate) / impl->sample_rate);
79}
80
81void TimeStretcher::AddSamples(const s16* buffer, size_t num_samples) {
82 impl->soundtouch.putSamples(buffer, static_cast<uint>(num_samples));
83 impl->samples_queued += num_samples;
84}
85
86void TimeStretcher::Flush() {
87 impl->soundtouch.flush();
88}
89
90void TimeStretcher::Reset() {
91 impl->soundtouch.setTempo(1.0);
92 impl->soundtouch.clear();
93 impl->smoothed_ratio = 1.0;
94 impl->frame_timer = steady_clock::now();
95 impl->samples_queued = 0;
96 SetOutputSampleRate(native_sample_rate);
97}
98
99double TimeStretcher::CalculateCurrentRatio() {
100 const steady_clock::time_point now = steady_clock::now();
101 const std::chrono::duration<double> duration = now - impl->frame_timer;
102
103 const double expected_time = static_cast<double>(impl->samples_queued) / static_cast<double>(native_sample_rate);
104 const double actual_time = duration.count();
105
106 double ratio;
107 if (expected_time != 0) {
108 ratio = ClampRatio(actual_time / expected_time);
109 } else {
110 ratio = impl->smoothed_ratio;
111 }
112
113 impl->frame_timer = now;
114 impl->samples_queued = 0;
115
116 return ratio;
117}
118
119double TimeStretcher::CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const {
120 const size_t min_sample_delay = static_cast<size_t>(MIN_DELAY_TIME * impl->sample_rate);
121 const size_t max_sample_delay = static_cast<size_t>(MAX_DELAY_TIME * impl->sample_rate);
122
123 if (sample_delay < min_sample_delay) {
124 // Make the ratio bigger.
125 ratio = ratio > 1.0 ? ratio * ratio : sqrt(ratio);
126 } else if (sample_delay > max_sample_delay) {
127 // Make the ratio smaller.
128 ratio = ratio > 1.0 ? sqrt(ratio) : ratio * ratio;
129 }
130
131 return ClampRatio(ratio);
132}
133
134std::vector<s16> TimeStretcher::GetSamples() {
135 uint available = impl->soundtouch.numSamples();
136
137 std::vector<s16> output(static_cast<size_t>(available) * 2);
138
139 impl->soundtouch.receiveSamples(output.data(), available);
140
141 return output;
142}
143
144} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
new file mode 100644
index 000000000..1fde3f72a
--- /dev/null
+++ b/src/audio_core/time_stretch.h
@@ -0,0 +1,57 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstddef>
6#include <memory>
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace AudioCore {
12
13class TimeStretcher final {
14public:
15 TimeStretcher();
16 ~TimeStretcher();
17
18 /**
19 * Set sample rate for the samples that Process returns.
20 * @param sample_rate The sample rate.
21 */
22 void SetOutputSampleRate(unsigned int sample_rate);
23
24 /**
25 * Add samples to be processed.
26 * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format.
27 * @param num_sample Number of samples.
28 */
29 void AddSamples(const s16* sample_buffer, size_t num_samples);
30
31 /// Flush audio remaining in internal buffers.
32 void Flush();
33
34 /// Resets internal state and clears buffers.
35 void Reset();
36
37 /**
38 * Does audio stretching and produces the time-stretched samples.
39 * Timer calculations use sample_delay to determine how much of a margin we have.
40 * @param sample_delay How many samples are buffered downstream of this module and haven't been played yet.
41 * @return Samples to play in interleaved stereo PCM16 format.
42 */
43 std::vector<s16> Process(size_t sample_delay);
44
45private:
46 struct Impl;
47 std::unique_ptr<Impl> impl;
48
49 /// INTERNAL: ratio = wallclock time / emulated time
50 double CalculateCurrentRatio();
51 /// INTERNAL: If we have too many or too few samples downstream, nudge ratio in the appropriate direction.
52 double CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const;
53 /// INTERNAL: Gets the time-stretched samples from SoundTouch.
54 std::vector<s16> GetSamples();
55};
56
57} // namespace AudioCore
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
index 1402f8e79..9c80f7ec9 100644
--- a/src/citra_qt/debugger/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -74,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() {
74 std::array<u32, 4 * 16> default_attributes; 74 std::array<u32, 4 * 16> default_attributes;
75 for (unsigned i = 0; i < 16; ++i) { 75 for (unsigned i = 0; i < 16; ++i) {
76 for (unsigned comp = 0; comp < 3; ++comp) { 76 for (unsigned comp = 0; comp < 3; ++comp) {
77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); 77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32());
78 } 78 }
79 } 79 }
80 80
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index a8d891689..f6a7566bf 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -52,6 +52,7 @@ set(SRCS
52 hle/service/apt/apt_a.cpp 52 hle/service/apt/apt_a.cpp
53 hle/service/apt/apt_s.cpp 53 hle/service/apt/apt_s.cpp
54 hle/service/apt/apt_u.cpp 54 hle/service/apt/apt_u.cpp
55 hle/service/apt/bcfnt/bcfnt.cpp
55 hle/service/boss/boss.cpp 56 hle/service/boss/boss.cpp
56 hle/service/boss/boss_p.cpp 57 hle/service/boss/boss_p.cpp
57 hle/service/boss/boss_u.cpp 58 hle/service/boss/boss_u.cpp
@@ -185,6 +186,7 @@ set(HEADERS
185 hle/service/apt/apt_a.h 186 hle/service/apt/apt_a.h
186 hle/service/apt/apt_s.h 187 hle/service/apt/apt_s.h
187 hle/service/apt/apt_u.h 188 hle/service/apt/apt_u.h
189 hle/service/apt/bcfnt/bcfnt.h
188 hle/service/boss/boss.h 190 hle/service/boss/boss.h
189 hle/service/boss/boss_p.h 191 hle/service/boss/boss_p.h
190 hle/service/boss/boss_u.h 192 hle/service/boss/boss_u.h
diff --git a/src/core/hle/applets/applet.h b/src/core/hle/applets/applet.h
index af442f81d..754c6f7db 100644
--- a/src/core/hle/applets/applet.h
+++ b/src/core/hle/applets/applet.h
@@ -65,6 +65,7 @@ protected:
65 virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0; 65 virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0;
66 66
67 Service::APT::AppletId id; ///< Id of this Applet 67 Service::APT::AppletId id; ///< Id of this Applet
68 std::shared_ptr<std::vector<u8>> heap_memory; ///< Heap memory for this Applet
68}; 69};
69 70
70/// Returns whether a library applet is currently running 71/// Returns whether a library applet is currently running
diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp
index b4456ca90..bf39eca22 100644
--- a/src/core/hle/applets/mii_selector.cpp
+++ b/src/core/hle/applets/mii_selector.cpp
@@ -35,9 +35,14 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p
35 ASSERT(sizeof(capture_info) == parameter.buffer_size); 35 ASSERT(sizeof(capture_info) == parameter.buffer_size);
36 36
37 memcpy(&capture_info, parameter.data, sizeof(capture_info)); 37 memcpy(&capture_info, parameter.data, sizeof(capture_info));
38
38 using Kernel::MemoryPermission; 39 using Kernel::MemoryPermission;
39 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, 40 // Allocate a heap block of the required size for this applet.
40 MemoryPermission::ReadWrite, "MiiSelector Memory"); 41 heap_memory = std::make_shared<std::vector<u8>>(capture_info.size);
42 // Create a SharedMemory that directly points to this heap block.
43 framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(),
44 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
45 "MiiSelector Memory");
41 46
42 // Send the response message with the newly created SharedMemory 47 // Send the response message with the newly created SharedMemory
43 Service::APT::MessageParameter result; 48 Service::APT::MessageParameter result;
diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp
index 87238aa1c..90c6adc65 100644
--- a/src/core/hle/applets/swkbd.cpp
+++ b/src/core/hle/applets/swkbd.cpp
@@ -40,8 +40,12 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con
40 memcpy(&capture_info, parameter.data, sizeof(capture_info)); 40 memcpy(&capture_info, parameter.data, sizeof(capture_info));
41 41
42 using Kernel::MemoryPermission; 42 using Kernel::MemoryPermission;
43 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, 43 // Allocate a heap block of the required size for this applet.
44 MemoryPermission::ReadWrite, "SoftwareKeyboard Memory"); 44 heap_memory = std::make_shared<std::vector<u8>>(capture_info.size);
45 // Create a SharedMemory that directly points to this heap block.
46 framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(),
47 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
48 "SoftwareKeyboard Memory");
45 49
46 // Send the response message with the newly created SharedMemory 50 // Send the response message with the newly created SharedMemory
47 Service::APT::MessageParameter result; 51 Service::APT::MessageParameter result;
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index 4d718b681..bf7f875b6 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -170,7 +170,8 @@ template<ResultCode func(s64*, u32, s32)> void Wrap() {
170 170
171template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() { 171template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() {
172 u32 param_1 = 0; 172 u32 param_1 = 0;
173 u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3), PARAM(4)).raw; 173 // The last parameter is passed in R0 instead of R4
174 u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3), PARAM(0)).raw;
174 Core::g_app_core->SetReg(1, param_1); 175 Core::g_app_core->SetReg(1, param_1);
175 FuncReturn(retval); 176 FuncReturn(retval);
176} 177}
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 862643448..17ae87aef 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -55,6 +55,9 @@ void MemoryInit(u32 mem_type) {
55 memory_regions[i].size = memory_region_sizes[mem_type][i]; 55 memory_regions[i].size = memory_region_sizes[mem_type][i];
56 memory_regions[i].used = 0; 56 memory_regions[i].used = 0;
57 memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); 57 memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>();
58 // Reserve enough space for this region of FCRAM.
59 // We do not want this block of memory to be relocated when allocating from it.
60 memory_regions[i].linear_heap_memory->reserve(memory_regions[i].size);
58 61
59 base += memory_regions[i].size; 62 base += memory_regions[i].size;
60 } 63 }
@@ -107,9 +110,7 @@ struct MemoryArea {
107 110
108// We don't declare the IO regions in here since its handled by other means. 111// We don't declare the IO regions in here since its handled by other means.
109static MemoryArea memory_areas[] = { 112static MemoryArea memory_areas[] = {
110 {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
111 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) 113 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
112 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
113}; 114};
114 115
115} 116}
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 0546f6e16..69302cc82 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -209,7 +209,7 @@ ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission p
209 return ERR_INVALID_ADDRESS; 209 return ERR_INVALID_ADDRESS;
210 } 210 }
211 211
212 // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its 212 // Expansion of the linear heap is only allowed if you do an allocation immediately at its
213 // end. It's possible to free gaps in the middle of the heap and then reallocate them later, 213 // end. It's possible to free gaps in the middle of the heap and then reallocate them later,
214 // but expansions are only allowed at the end. 214 // but expansions are only allowed at the end.
215 if (target == heap_end) { 215 if (target == heap_end) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index a06afef2b..d781ef32c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -142,8 +142,11 @@ public:
142 142
143 MemoryRegionInfo* memory_region = nullptr; 143 MemoryRegionInfo* memory_region = nullptr;
144 144
145 /// Bitmask of the used TLS slots 145 /// The Thread Local Storage area is allocated as processes create threads,
146 std::bitset<300> used_tls_slots; 146 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
147 /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask.
148 /// This vector will grow as more pages are allocated for new threads.
149 std::vector<std::bitset<8>> tls_slots;
147 150
148 VAddr GetLinearHeapAreaAddress() const; 151 VAddr GetLinearHeapAreaAddress() const;
149 VAddr GetLinearHeapBase() const; 152 VAddr GetLinearHeapBase() const;
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index d90f0f00f..6a22c8986 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -7,6 +7,7 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8 8
9#include "core/memory.h" 9#include "core/memory.h"
10#include "core/hle/kernel/memory.h"
10#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
11 12
12namespace Kernel { 13namespace Kernel {
@@ -14,93 +15,157 @@ namespace Kernel {
14SharedMemory::SharedMemory() {} 15SharedMemory::SharedMemory() {}
15SharedMemory::~SharedMemory() {} 16SharedMemory::~SharedMemory() {}
16 17
17SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissions, 18SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions,
18 MemoryPermission other_permissions, std::string name) { 19 MemoryPermission other_permissions, VAddr address, MemoryRegion region, std::string name) {
19 SharedPtr<SharedMemory> shared_memory(new SharedMemory); 20 SharedPtr<SharedMemory> shared_memory(new SharedMemory);
20 21
22 shared_memory->owner_process = owner_process;
21 shared_memory->name = std::move(name); 23 shared_memory->name = std::move(name);
22 shared_memory->base_address = 0x0;
23 shared_memory->fixed_address = 0x0;
24 shared_memory->size = size; 24 shared_memory->size = size;
25 shared_memory->permissions = permissions; 25 shared_memory->permissions = permissions;
26 shared_memory->other_permissions = other_permissions; 26 shared_memory->other_permissions = other_permissions;
27 27
28 if (address == 0) {
29 // We need to allocate a block from the Linear Heap ourselves.
30 // We'll manually allocate some memory from the linear heap in the specified region.
31 MemoryRegionInfo* memory_region = GetMemoryRegion(region);
32 auto& linheap_memory = memory_region->linear_heap_memory;
33
34 ASSERT_MSG(linheap_memory->size() + size <= memory_region->size, "Not enough space in region to allocate shared memory!");
35
36 shared_memory->backing_block = linheap_memory;
37 shared_memory->backing_block_offset = linheap_memory->size();
38 // Allocate some memory from the end of the linear heap for this region.
39 linheap_memory->insert(linheap_memory->end(), size, 0);
40 memory_region->used += size;
41
42 shared_memory->linear_heap_phys_address = Memory::FCRAM_PADDR + memory_region->base + shared_memory->backing_block_offset;
43
44 // Increase the amount of used linear heap memory for the owner process.
45 if (shared_memory->owner_process != nullptr) {
46 shared_memory->owner_process->linear_heap_used += size;
47 }
48
49 // Refresh the address mappings for the current process.
50 if (Kernel::g_current_process != nullptr) {
51 Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
52 }
53 } else {
54 // TODO(Subv): What happens if an application tries to create multiple memory blocks pointing to the same address?
55 auto& vm_manager = shared_memory->owner_process->vm_manager;
56 // The memory is already available and mapped in the owner process.
57 auto vma = vm_manager.FindVMA(address)->second;
58 // Copy it over to our own storage
59 shared_memory->backing_block = std::make_shared<std::vector<u8>>(vma.backing_block->data() + vma.offset,
60 vma.backing_block->data() + vma.offset + size);
61 shared_memory->backing_block_offset = 0;
62 // Unmap the existing pages
63 vm_manager.UnmapRange(address, size);
64 // Map our own block into the address space
65 vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size, MemoryState::Shared);
66 // Reprotect the block with the new permissions
67 vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions));
68 }
69
70 shared_memory->base_address = address;
28 return shared_memory; 71 return shared_memory;
29} 72}
30 73
31ResultCode SharedMemory::Map(VAddr address, MemoryPermission permissions, 74SharedPtr<SharedMemory> SharedMemory::CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size,
32 MemoryPermission other_permissions) { 75 MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
76 SharedPtr<SharedMemory> shared_memory(new SharedMemory);
33 77
34 if (base_address != 0) { 78 shared_memory->owner_process = nullptr;
35 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: already mapped at 0x%08X!", 79 shared_memory->name = std::move(name);
36 GetObjectId(), address, name.c_str(), base_address); 80 shared_memory->size = size;
37 // TODO: Verify error code with hardware 81 shared_memory->permissions = permissions;
38 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, 82 shared_memory->other_permissions = other_permissions;
39 ErrorSummary::InvalidArgument, ErrorLevel::Permanent); 83 shared_memory->backing_block = heap_block;
40 } 84 shared_memory->backing_block_offset = offset;
85 shared_memory->base_address = Memory::HEAP_VADDR + offset;
41 86
42 // TODO(Subv): Return E0E01BEE when permissions and other_permissions don't 87 return shared_memory;
43 // match what was specified when the memory block was created. 88}
44 89
45 // TODO(Subv): Return E0E01BEE when address should be 0. 90ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermission permissions,
46 // Note: Find out when that's the case. 91 MemoryPermission other_permissions) {
47 92
48 if (fixed_address != 0) { 93 MemoryPermission own_other_permissions = target_process == owner_process ? this->permissions : this->other_permissions;
49 if (address != 0 && address != fixed_address) {
50 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: fixed_addres is 0x%08X!",
51 GetObjectId(), address, name.c_str(), fixed_address);
52 // TODO: Verify error code with hardware
53 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
54 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
55 }
56 94
57 // HACK(yuriks): This is only here to support the APT shared font mapping right now. 95 // Automatically allocated memory blocks can only be mapped with other_permissions = DontCare
58 // Later, this should actually map the memory block onto the address space. 96 if (base_address == 0 && other_permissions != MemoryPermission::DontCare) {
59 return RESULT_SUCCESS; 97 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
60 } 98 }
61 99
62 if (address < Memory::SHARED_MEMORY_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) { 100 // Error out if the requested permissions don't match what the creator process allows.
63 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s outside of shared mem bounds!", 101 if (static_cast<u32>(permissions) & ~static_cast<u32>(own_other_permissions)) {
64 GetObjectId(), address, name.c_str()); 102 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
65 // TODO: Verify error code with hardware 103 GetObjectId(), address, name.c_str());
66 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, 104 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
67 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
68 } 105 }
69 106
70 // TODO: Test permissions 107 // Heap-backed memory blocks can not be mapped with other_permissions = DontCare
108 if (base_address != 0 && other_permissions == MemoryPermission::DontCare) {
109 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
110 GetObjectId(), address, name.c_str());
111 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
112 }
71 113
72 // HACK: Since there's no way to write to the memory block without mapping it onto the game 114 // Error out if the provided permissions are not compatible with what the creator process needs.
73 // process yet, at least initialize memory the first time it's mapped. 115 if (other_permissions != MemoryPermission::DontCare &&
74 if (address != this->base_address) { 116 static_cast<u32>(this->permissions) & ~static_cast<u32>(other_permissions)) {
75 std::memset(Memory::GetPointer(address), 0, size); 117 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
118 GetObjectId(), address, name.c_str());
119 return ResultCode(ErrorDescription::WrongPermission, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent);
76 } 120 }
77 121
78 this->base_address = address; 122 // TODO(Subv): Check for the Shared Device Mem flag in the creator process.
123 /*if (was_created_with_shared_device_mem && address != 0) {
124 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
125 }*/
79 126
80 return RESULT_SUCCESS; 127 // TODO(Subv): The same process that created a SharedMemory object
81} 128 // can not map it in its own address space unless it was created with addr=0, result 0xD900182C.
82 129
83ResultCode SharedMemory::Unmap(VAddr address) { 130 if (address != 0) {
84 if (base_address == 0) { 131 if (address < Memory::HEAP_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) {
85 // TODO(Subv): Verify what actually happens when you want to unmap a memory block that 132 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, invalid address",
86 // was originally mapped with address = 0 133 GetObjectId(), address, name.c_str());
87 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); 134 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
135 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
136 }
88 } 137 }
89 138
90 if (base_address != address) 139 VAddr target_address = address;
91 return ResultCode(ErrorDescription::WrongAddress, ErrorModule::OS, ErrorSummary::InvalidState, ErrorLevel::Usage);
92 140
93 base_address = 0; 141 if (base_address == 0 && target_address == 0) {
142 // Calculate the address at which to map the memory block.
143 target_address = Memory::PhysicalToVirtualAddress(linear_heap_phys_address);
144 }
145
146 // Map the memory block into the target process
147 auto result = target_process->vm_manager.MapMemoryBlock(target_address, backing_block, backing_block_offset, size, MemoryState::Shared);
148 if (result.Failed()) {
149 LOG_ERROR(Kernel, "cannot map id=%u, target_address=0x%08X name=%s, error mapping to virtual memory",
150 GetObjectId(), target_address, name.c_str());
151 return result.Code();
152 }
94 153
95 return RESULT_SUCCESS; 154 return target_process->vm_manager.ReprotectRange(target_address, size, ConvertPermissions(permissions));
96} 155}
97 156
98u8* SharedMemory::GetPointer(u32 offset) { 157ResultCode SharedMemory::Unmap(Process* target_process, VAddr address) {
99 if (base_address != 0) 158 // TODO(Subv): Verify what happens if the application tries to unmap an address that is not mapped to a SharedMemory.
100 return Memory::GetPointer(base_address + offset); 159 return target_process->vm_manager.UnmapRange(address, size);
160}
161
162VMAPermission SharedMemory::ConvertPermissions(MemoryPermission permission) {
163 u32 masked_permissions = static_cast<u32>(permission) & static_cast<u32>(MemoryPermission::ReadWriteExecute);
164 return static_cast<VMAPermission>(masked_permissions);
165};
101 166
102 LOG_ERROR(Kernel_SVC, "memory block id=%u not mapped!", GetObjectId()); 167u8* SharedMemory::GetPointer(u32 offset) {
103 return nullptr; 168 return backing_block->data() + backing_block_offset + offset;
104} 169}
105 170
106} // namespace 171} // namespace
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index b51049ad0..0c404a9f8 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10 10
11#include "core/hle/kernel/kernel.h" 11#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/process.h"
12#include "core/hle/result.h" 13#include "core/hle/result.h"
13 14
14namespace Kernel { 15namespace Kernel {
@@ -29,14 +30,29 @@ enum class MemoryPermission : u32 {
29class SharedMemory final : public Object { 30class SharedMemory final : public Object {
30public: 31public:
31 /** 32 /**
32 * Creates a shared memory object 33 * Creates a shared memory object.
34 * @param owner_process Process that created this shared memory object.
33 * @param size Size of the memory block. Must be page-aligned. 35 * @param size Size of the memory block. Must be page-aligned.
34 * @param permissions Permission restrictions applied to the process which created the block. 36 * @param permissions Permission restrictions applied to the process which created the block.
35 * @param other_permissions Permission restrictions applied to other processes mapping the block. 37 * @param other_permissions Permission restrictions applied to other processes mapping the block.
38 * @param address The address from which to map the Shared Memory.
39 * @param region If the address is 0, the shared memory will be allocated in this region of the linear heap.
36 * @param name Optional object name, used for debugging purposes. 40 * @param name Optional object name, used for debugging purposes.
37 */ 41 */
38 static SharedPtr<SharedMemory> Create(u32 size, MemoryPermission permissions, 42 static SharedPtr<SharedMemory> Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions,
39 MemoryPermission other_permissions, std::string name = "Unknown"); 43 MemoryPermission other_permissions, VAddr address = 0, MemoryRegion region = MemoryRegion::BASE, std::string name = "Unknown");
44
45 /**
46 * Creates a shared memory object from a block of memory managed by an HLE applet.
47 * @param heap_block Heap block of the HLE applet.
48 * @param offset The offset into the heap block that the SharedMemory will map.
49 * @param size Size of the memory block. Must be page-aligned.
50 * @param permissions Permission restrictions applied to the process which created the block.
51 * @param other_permissions Permission restrictions applied to other processes mapping the block.
52 * @param name Optional object name, used for debugging purposes.
53 */
54 static SharedPtr<SharedMemory> CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size,
55 MemoryPermission permissions, MemoryPermission other_permissions, std::string name = "Unknown Applet");
40 56
41 std::string GetTypeName() const override { return "SharedMemory"; } 57 std::string GetTypeName() const override { return "SharedMemory"; }
42 std::string GetName() const override { return name; } 58 std::string GetName() const override { return name; }
@@ -45,19 +61,27 @@ public:
45 HandleType GetHandleType() const override { return HANDLE_TYPE; } 61 HandleType GetHandleType() const override { return HANDLE_TYPE; }
46 62
47 /** 63 /**
48 * Maps a shared memory block to an address in system memory 64 * Converts the specified MemoryPermission into the equivalent VMAPermission.
65 * @param permission The MemoryPermission to convert.
66 */
67 static VMAPermission ConvertPermissions(MemoryPermission permission);
68
69 /**
70 * Maps a shared memory block to an address in the target process' address space
71 * @param target_process Process on which to map the memory block.
49 * @param address Address in system memory to map shared memory block to 72 * @param address Address in system memory to map shared memory block to
50 * @param permissions Memory block map permissions (specified by SVC field) 73 * @param permissions Memory block map permissions (specified by SVC field)
51 * @param other_permissions Memory block map other permissions (specified by SVC field) 74 * @param other_permissions Memory block map other permissions (specified by SVC field)
52 */ 75 */
53 ResultCode Map(VAddr address, MemoryPermission permissions, MemoryPermission other_permissions); 76 ResultCode Map(Process* target_process, VAddr address, MemoryPermission permissions, MemoryPermission other_permissions);
54 77
55 /** 78 /**
56 * Unmaps a shared memory block from the specified address in system memory 79 * Unmaps a shared memory block from the specified address in system memory
80 * @param target_process Process from which to umap the memory block.
57 * @param address Address in system memory where the shared memory block is mapped 81 * @param address Address in system memory where the shared memory block is mapped
58 * @return Result code of the unmap operation 82 * @return Result code of the unmap operation
59 */ 83 */
60 ResultCode Unmap(VAddr address); 84 ResultCode Unmap(Process* target_process, VAddr address);
61 85
62 /** 86 /**
63 * Gets a pointer to the shared memory block 87 * Gets a pointer to the shared memory block
@@ -66,10 +90,16 @@ public:
66 */ 90 */
67 u8* GetPointer(u32 offset = 0); 91 u8* GetPointer(u32 offset = 0);
68 92
69 /// Address of shared memory block in the process. 93 /// Process that created this shared memory block.
94 SharedPtr<Process> owner_process;
95 /// Address of shared memory block in the owner process if specified.
70 VAddr base_address; 96 VAddr base_address;
71 /// Fixed address to allow mapping to. Used for blocks created from the linear heap. 97 /// Physical address of the shared memory block in the linear heap if no address was specified during creation.
72 VAddr fixed_address; 98 PAddr linear_heap_phys_address;
99 /// Backing memory for this shared memory block.
100 std::shared_ptr<std::vector<u8>> backing_block;
101 /// Offset into the backing block for this shared memory.
102 u32 backing_block_offset;
73 /// Size of the memory block. Page-aligned. 103 /// Size of the memory block. Page-aligned.
74 u32 size; 104 u32 size;
75 /// Permission restrictions applied to the process which created the block. 105 /// Permission restrictions applied to the process which created the block.
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6dc95d0f1..68f026918 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,9 +117,10 @@ void Thread::Stop() {
117 } 117 }
118 wait_objects.clear(); 118 wait_objects.clear();
119 119
120 Kernel::g_current_process->used_tls_slots[tls_index] = false; 120 // Mark the TLS slot in the thread's page as free.
121 g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; 121 u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
122 g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE; 122 u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
123 Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
123 124
124 HLE::Reschedule(__func__); 125 HLE::Reschedule(__func__);
125} 126}
@@ -366,6 +367,31 @@ static void DebugThreadQueue() {
366 } 367 }
367} 368}
368 369
370/**
371 * Finds a free location for the TLS section of a thread.
372 * @param tls_slots The TLS page array of the thread's owner process.
373 * Returns a tuple of (page, slot, alloc_needed) where:
374 * page: The index of the first allocated TLS page that has free slots.
375 * slot: The index of the first free slot in the indicated page.
376 * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
377 */
378std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) {
379 // Iterate over all the allocated pages, and try to find one where not all slots are used.
380 for (unsigned page = 0; page < tls_slots.size(); ++page) {
381 const auto& page_tls_slots = tls_slots[page];
382 if (!page_tls_slots.all()) {
383 // We found a page with at least one free slot, find which slot it is
384 for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) {
385 if (!page_tls_slots.test(slot)) {
386 return std::make_tuple(page, slot, false);
387 }
388 }
389 }
390 }
391
392 return std::make_tuple(0, 0, true);
393}
394
369ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, 395ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority,
370 u32 arg, s32 processor_id, VAddr stack_top) { 396 u32 arg, s32 processor_id, VAddr stack_top) {
371 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { 397 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) {
@@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
403 thread->name = std::move(name); 429 thread->name = std::move(name);
404 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); 430 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
405 thread->owner_process = g_current_process; 431 thread->owner_process = g_current_process;
406 thread->tls_index = -1;
407 thread->waitsynch_waited = false; 432 thread->waitsynch_waited = false;
408 433
409 // Find the next available TLS index, and mark it as used 434 // Find the next available TLS index, and mark it as used
410 auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; 435 auto& tls_slots = Kernel::g_current_process->tls_slots;
411 for (unsigned int i = 0; i < used_tls_slots.size(); ++i) { 436 bool needs_allocation = true;
412 if (used_tls_slots[i] == false) { 437 u32 available_page; // Which allocated page has free space
413 thread->tls_index = i; 438 u32 available_slot; // Which slot within the page is free
414 used_tls_slots[i] = true; 439
415 break; 440 std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots);
441
442 if (needs_allocation) {
443 // There are no already-allocated pages with free slots, lets allocate a new one.
444 // TLS pages are allocated from the BASE region in the linear heap.
445 MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE);
446 auto& linheap_memory = memory_region->linear_heap_memory;
447
448 if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) {
449 LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread");
450 return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent);
416 } 451 }
452
453 u32 offset = linheap_memory->size();
454
455 // Allocate some memory from the end of the linear heap for this region.
456 linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0);
457 memory_region->used += Memory::PAGE_SIZE;
458 Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE;
459
460 tls_slots.emplace_back(0); // The page is completely available at the start
461 available_page = tls_slots.size() - 1;
462 available_slot = 0; // Use the first slot in the new page
463
464 auto& vm_manager = Kernel::g_current_process->vm_manager;
465 vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
466
467 // Map the page to the current process' address space.
468 // TODO(Subv): Find the correct MemoryState for this region.
469 vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
470 linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private);
417 } 471 }
418 472
419 ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); 473 // Mark the slot as used
420 g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; 474 tls_slots[available_page].set(available_slot);
421 g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE; 475 thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
422 476
423 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 477 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
424 // to initialize the context 478 // to initialize the context
@@ -509,10 +563,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
509 context.cpu_registers[1] = output; 563 context.cpu_registers[1] = output;
510} 564}
511 565
512VAddr Thread::GetTLSAddress() const {
513 return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
514}
515
516//////////////////////////////////////////////////////////////////////////////////////////////////// 566////////////////////////////////////////////////////////////////////////////////////////////////////
517 567
518void ThreadingInit() { 568void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 97ba57fc5..deab5d5a6 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -127,7 +127,7 @@ public:
127 * Returns the Thread Local Storage address of the current thread 127 * Returns the Thread Local Storage address of the current thread
128 * @returns VAddr of the thread's TLS 128 * @returns VAddr of the thread's TLS
129 */ 129 */
130 VAddr GetTLSAddress() const; 130 VAddr GetTLSAddress() const { return tls_address; }
131 131
132 Core::ThreadContext context; 132 Core::ThreadContext context;
133 133
@@ -144,7 +144,7 @@ public:
144 144
145 s32 processor_id; 145 s32 processor_id;
146 146
147 s32 tls_index; ///< Index of the Thread Local Storage of the thread 147 VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread
148 148
149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait 149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
150 150
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 3fc1ab4ee..bfb3327ce 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -17,6 +17,7 @@
17/// Detailed description of the error. This listing is likely incomplete. 17/// Detailed description of the error. This listing is likely incomplete.
18enum class ErrorDescription : u32 { 18enum class ErrorDescription : u32 {
19 Success = 0, 19 Success = 0,
20 WrongPermission = 46,
20 OS_InvalidBufferDescriptor = 48, 21 OS_InvalidBufferDescriptor = 48,
21 WrongAddress = 53, 22 WrongAddress = 53,
22 FS_NotFound = 120, 23 FS_NotFound = 120,
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 6d72e8188..73fce6079 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -12,6 +12,7 @@
12#include "core/hle/service/apt/apt_a.h" 12#include "core/hle/service/apt/apt_a.h"
13#include "core/hle/service/apt/apt_s.h" 13#include "core/hle/service/apt/apt_s.h"
14#include "core/hle/service/apt/apt_u.h" 14#include "core/hle/service/apt/apt_u.h"
15#include "core/hle/service/apt/bcfnt/bcfnt.h"
15#include "core/hle/service/fs/archive.h" 16#include "core/hle/service/fs/archive.h"
16 17
17#include "core/hle/kernel/event.h" 18#include "core/hle/kernel/event.h"
@@ -22,23 +23,14 @@
22namespace Service { 23namespace Service {
23namespace APT { 24namespace APT {
24 25
25// Address used for shared font (as observed on HW)
26// TODO(bunnei): This is the hard-coded address where we currently dump the shared font from via
27// https://github.com/citra-emu/3dsutils. This is technically a hack, and will not work at any
28// address other than 0x18000000 due to internal pointers in the shared font dump that would need to
29// be relocated. This might be fixed by dumping the shared font @ address 0x00000000 and then
30// correctly mapping it in Citra, however we still do not understand how the mapping is determined.
31static const VAddr SHARED_FONT_VADDR = 0x18000000;
32
33/// Handle to shared memory region designated to for shared system font 26/// Handle to shared memory region designated to for shared system font
34static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; 27static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
28static bool shared_font_relocated = false;
35 29
36static Kernel::SharedPtr<Kernel::Mutex> lock; 30static Kernel::SharedPtr<Kernel::Mutex> lock;
37static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event 31static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event
38static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event 32static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event
39 33
40static std::shared_ptr<std::vector<u8>> shared_font;
41
42static u32 cpu_percent; ///< CPU time available to the running application 34static u32 cpu_percent; ///< CPU time available to the running application
43 35
44/// Parameter data to be returned in the next call to Glance/ReceiveParameter 36/// Parameter data to be returned in the next call to Glance/ReceiveParameter
@@ -74,23 +66,25 @@ void Initialize(Service::Interface* self) {
74void GetSharedFont(Service::Interface* self) { 66void GetSharedFont(Service::Interface* self) {
75 u32* cmd_buff = Kernel::GetCommandBuffer(); 67 u32* cmd_buff = Kernel::GetCommandBuffer();
76 68
77 if (shared_font != nullptr) { 69 // The shared font has to be relocated to the new address before being passed to the application.
78 // TODO(yuriks): This is a hack to keep this working right now even with our completely 70 VAddr target_address = Memory::PhysicalToVirtualAddress(shared_font_mem->linear_heap_phys_address);
79 // broken shared memory system. 71 // The shared font dumped by 3dsutils (https://github.com/citra-emu/3dsutils) uses this address as base,
80 shared_font_mem->fixed_address = SHARED_FONT_VADDR; 72 // so we relocate it from there to our real address.
81 Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->fixed_address, 73 // TODO(Subv): This address is wrong if the shared font is dumped from a n3DS,
82 shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared); 74 // we need a way to automatically calculate the original address of the font from the file.
83 75 static const VAddr SHARED_FONT_VADDR = 0x18000000;
84 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); 76 if (!shared_font_relocated) {
85 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 77 BCFNT::RelocateSharedFont(shared_font_mem, SHARED_FONT_VADDR, target_address);
86 cmd_buff[2] = SHARED_FONT_VADDR; 78 shared_font_relocated = true;
87 cmd_buff[3] = IPC::MoveHandleDesc();
88 cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom();
89 } else {
90 cmd_buff[0] = IPC::MakeHeader(0x44, 1, 0);
91 cmd_buff[1] = -1; // Generic error (not really possible to verify this on hardware)
92 LOG_ERROR(Kernel_SVC, "called, but %s has not been loaded!", SHARED_FONT);
93 } 79 }
80 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2);
81 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
82 // Since the SharedMemory interface doesn't provide the address at which the memory was allocated,
83 // the real APT service calculates this address by scanning the entire address space (using svcQueryMemory)
84 // and searches for an allocation of the same size as the Shared Font.
85 cmd_buff[2] = target_address;
86 cmd_buff[3] = IPC::MoveHandleDesc();
87 cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom();
94} 88}
95 89
96void NotifyToWait(Service::Interface* self) { 90void NotifyToWait(Service::Interface* self) {
@@ -433,14 +427,12 @@ void Init() {
433 FileUtil::IOFile file(filepath, "rb"); 427 FileUtil::IOFile file(filepath, "rb");
434 428
435 if (file.IsOpen()) { 429 if (file.IsOpen()) {
436 // Read shared font data
437 shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize());
438 file.ReadBytes(shared_font->data(), shared_font->size());
439
440 // Create shared font memory object 430 // Create shared font memory object
441 using Kernel::MemoryPermission; 431 using Kernel::MemoryPermission;
442 shared_font_mem = Kernel::SharedMemory::Create(3 * 1024 * 1024, // 3MB 432 shared_font_mem = Kernel::SharedMemory::Create(nullptr, 0x332000, // 3272 KB
443 MemoryPermission::ReadWrite, MemoryPermission::Read, "APT_U:shared_font_mem"); 433 MemoryPermission::ReadWrite, MemoryPermission::Read, 0, Kernel::MemoryRegion::SYSTEM, "APT:SharedFont");
434 // Read shared font data
435 file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize());
444 } else { 436 } else {
445 LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str()); 437 LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str());
446 shared_font_mem = nullptr; 438 shared_font_mem = nullptr;
@@ -459,8 +451,8 @@ void Init() {
459} 451}
460 452
461void Shutdown() { 453void Shutdown() {
462 shared_font = nullptr;
463 shared_font_mem = nullptr; 454 shared_font_mem = nullptr;
455 shared_font_relocated = false;
464 lock = nullptr; 456 lock = nullptr;
465 notification_event = nullptr; 457 notification_event = nullptr;
466 parameter_event = nullptr; 458 parameter_event = nullptr;
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.cpp b/src/core/hle/service/apt/bcfnt/bcfnt.cpp
new file mode 100644
index 000000000..b0d39d4a5
--- /dev/null
+++ b/src/core/hle/service/apt/bcfnt/bcfnt.cpp
@@ -0,0 +1,71 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/apt/bcfnt/bcfnt.h"
6#include "core/hle/service/service.h"
7
8namespace Service {
9namespace APT {
10namespace BCFNT {
11
12void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address) {
13 static const u32 SharedFontStartOffset = 0x80;
14 u8* data = shared_font->GetPointer(SharedFontStartOffset);
15
16 CFNT cfnt;
17 memcpy(&cfnt, data, sizeof(cfnt));
18
19 // Advance past the header
20 data = shared_font->GetPointer(SharedFontStartOffset + cfnt.header_size);
21
22 for (unsigned block = 0; block < cfnt.num_blocks; ++block) {
23
24 u32 section_size = 0;
25 if (memcmp(data, "FINF", 4) == 0) {
26 BCFNT::FINF finf;
27 memcpy(&finf, data, sizeof(finf));
28 section_size = finf.section_size;
29
30 // Relocate the offsets in the FINF section
31 finf.cmap_offset += new_address - previous_address;
32 finf.cwdh_offset += new_address - previous_address;
33 finf.tglp_offset += new_address - previous_address;
34
35 memcpy(data, &finf, sizeof(finf));
36 } else if (memcmp(data, "CMAP", 4) == 0) {
37 BCFNT::CMAP cmap;
38 memcpy(&cmap, data, sizeof(cmap));
39 section_size = cmap.section_size;
40
41 // Relocate the offsets in the CMAP section
42 cmap.next_cmap_offset += new_address - previous_address;
43
44 memcpy(data, &cmap, sizeof(cmap));
45 } else if (memcmp(data, "CWDH", 4) == 0) {
46 BCFNT::CWDH cwdh;
47 memcpy(&cwdh, data, sizeof(cwdh));
48 section_size = cwdh.section_size;
49
50 // Relocate the offsets in the CWDH section
51 cwdh.next_cwdh_offset += new_address - previous_address;
52
53 memcpy(data, &cwdh, sizeof(cwdh));
54 } else if (memcmp(data, "TGLP", 4) == 0) {
55 BCFNT::TGLP tglp;
56 memcpy(&tglp, data, sizeof(tglp));
57 section_size = tglp.section_size;
58
59 // Relocate the offsets in the TGLP section
60 tglp.sheet_data_offset += new_address - previous_address;
61
62 memcpy(data, &tglp, sizeof(tglp));
63 }
64
65 data += section_size;
66 }
67}
68
69} // namespace BCFNT
70} // namespace APT
71} // namespace Service \ No newline at end of file
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.h b/src/core/hle/service/apt/bcfnt/bcfnt.h
new file mode 100644
index 000000000..388c6bea0
--- /dev/null
+++ b/src/core/hle/service/apt/bcfnt/bcfnt.h
@@ -0,0 +1,87 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/swap.h"
8
9#include "core/hle/kernel/shared_memory.h"
10#include "core/hle/service/service.h"
11
12namespace Service {
13namespace APT {
14namespace BCFNT { ///< BCFNT Shared Font file structures
15
16struct CFNT {
17 u8 magic[4];
18 u16_le endianness;
19 u16_le header_size;
20 u32_le version;
21 u32_le file_size;
22 u32_le num_blocks;
23};
24
25struct FINF {
26 u8 magic[4];
27 u32_le section_size;
28 u8 font_type;
29 u8 line_feed;
30 u16_le alter_char_index;
31 u8 default_width[3];
32 u8 encoding;
33 u32_le tglp_offset;
34 u32_le cwdh_offset;
35 u32_le cmap_offset;
36 u8 height;
37 u8 width;
38 u8 ascent;
39 u8 reserved;
40};
41
42struct TGLP {
43 u8 magic[4];
44 u32_le section_size;
45 u8 cell_width;
46 u8 cell_height;
47 u8 baseline_position;
48 u8 max_character_width;
49 u32_le sheet_size;
50 u16_le num_sheets;
51 u16_le sheet_image_format;
52 u16_le num_columns;
53 u16_le num_rows;
54 u16_le sheet_width;
55 u16_le sheet_height;
56 u32_le sheet_data_offset;
57};
58
59struct CMAP {
60 u8 magic[4];
61 u32_le section_size;
62 u16_le code_begin;
63 u16_le code_end;
64 u16_le mapping_method;
65 u16_le reserved;
66 u32_le next_cmap_offset;
67};
68
69struct CWDH {
70 u8 magic[4];
71 u32_le section_size;
72 u16_le start_index;
73 u16_le end_index;
74 u32_le next_cwdh_offset;
75};
76
77/**
78 * Relocates the internal addresses of the BCFNT Shared Font to the new base.
79 * @param shared_font SharedMemory object that contains the Shared Font
80 * @param previous_address Previous address at which the offsets in the structure were based.
81 * @param new_address New base for the offsets in the structure.
82 */
83void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address);
84
85} // namespace BCFNT
86} // namespace APT
87} // namespace Service
diff --git a/src/core/hle/service/csnd_snd.cpp b/src/core/hle/service/csnd_snd.cpp
index 6318bf2a7..d2bb8941c 100644
--- a/src/core/hle/service/csnd_snd.cpp
+++ b/src/core/hle/service/csnd_snd.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include "common/alignment.h"
6#include "core/hle/hle.h" 7#include "core/hle/hle.h"
7#include "core/hle/kernel/mutex.h" 8#include "core/hle/kernel/mutex.h"
8#include "core/hle/kernel/shared_memory.h" 9#include "core/hle/kernel/shared_memory.h"
@@ -41,14 +42,16 @@ static Kernel::SharedPtr<Kernel::Mutex> mutex = nullptr;
41void Initialize(Service::Interface* self) { 42void Initialize(Service::Interface* self) {
42 u32* cmd_buff = Kernel::GetCommandBuffer(); 43 u32* cmd_buff = Kernel::GetCommandBuffer();
43 44
44 shared_memory = Kernel::SharedMemory::Create(cmd_buff[1], 45 u32 size = Common::AlignUp(cmd_buff[1], Memory::PAGE_SIZE);
45 Kernel::MemoryPermission::ReadWrite, 46 using Kernel::MemoryPermission;
46 Kernel::MemoryPermission::ReadWrite, "CSNDSharedMem"); 47 shared_memory = Kernel::SharedMemory::Create(nullptr, size,
48 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
49 0, Kernel::MemoryRegion::BASE, "CSND:SharedMemory");
47 50
48 mutex = Kernel::Mutex::Create(false); 51 mutex = Kernel::Mutex::Create(false);
49 52
50 cmd_buff[1] = 0; 53 cmd_buff[1] = RESULT_SUCCESS.raw;
51 cmd_buff[2] = 0x4000000; 54 cmd_buff[2] = IPC::MoveHandleDesc(2);
52 cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom(); 55 cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom();
53 cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom(); 56 cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom();
54} 57}
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 274fc751a..10730d7ac 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -440,9 +440,9 @@ static void GetHeadphoneStatus(Service::Interface* self) {
440 440
441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0); 441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
443 cmd_buff[2] = 0; // Not using headphones? 443 cmd_buff[2] = 0; // Not using headphones
444 444
445 LOG_WARNING(Service_DSP, "(STUBBED) called"); 445 LOG_DEBUG(Service_DSP, "called");
446} 446}
447 447
448/** 448/**
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index b4c146e08..8ded9b09b 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -335,8 +335,9 @@ static void RegisterInterruptRelayQueue(Service::Interface* self) {
335 g_interrupt_event->name = "GSP_GPU::interrupt_event"; 335 g_interrupt_event->name = "GSP_GPU::interrupt_event";
336 336
337 using Kernel::MemoryPermission; 337 using Kernel::MemoryPermission;
338 g_shared_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, 338 g_shared_memory = Kernel::SharedMemory::Create(nullptr, 0x1000,
339 MemoryPermission::ReadWrite, "GSPSharedMem"); 339 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
340 0, Kernel::MemoryRegion::BASE, "GSP:SharedMemory");
340 341
341 Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom(); 342 Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom();
342 343
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 1053d0f40..d216cecb4 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -280,8 +280,9 @@ void Init() {
280 AddService(new HID_SPVR_Interface); 280 AddService(new HID_SPVR_Interface);
281 281
282 using Kernel::MemoryPermission; 282 using Kernel::MemoryPermission;
283 shared_mem = SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, 283 shared_mem = SharedMemory::Create(nullptr, 0x1000,
284 MemoryPermission::Read, "HID:SharedMem"); 284 MemoryPermission::ReadWrite, MemoryPermission::Read,
285 0, Kernel::MemoryRegion::BASE, "HID:SharedMemory");
285 286
286 next_pad_index = 0; 287 next_pad_index = 0;
287 next_touch_index = 0; 288 next_touch_index = 0;
diff --git a/src/core/hle/service/ir/ir.cpp b/src/core/hle/service/ir/ir.cpp
index 505c441c6..079a87e48 100644
--- a/src/core/hle/service/ir/ir.cpp
+++ b/src/core/hle/service/ir/ir.cpp
@@ -94,8 +94,9 @@ void Init() {
94 AddService(new IR_User_Interface); 94 AddService(new IR_User_Interface);
95 95
96 using Kernel::MemoryPermission; 96 using Kernel::MemoryPermission;
97 shared_memory = SharedMemory::Create(0x1000, Kernel::MemoryPermission::ReadWrite, 97 shared_memory = SharedMemory::Create(nullptr, 0x1000,
98 Kernel::MemoryPermission::ReadWrite, "IR:SharedMemory"); 98 Kernel::MemoryPermission::ReadWrite, Kernel::MemoryPermission::ReadWrite,
99 0, Kernel::MemoryRegion::BASE, "IR:SharedMemory");
99 transfer_shared_memory = nullptr; 100 transfer_shared_memory = nullptr;
100 101
101 // Create event handle(s) 102 // Create event handle(s)
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 60c8747f3..3a53126c1 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -99,6 +99,7 @@ static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 add
99 switch (operation & MEMOP_OPERATION_MASK) { 99 switch (operation & MEMOP_OPERATION_MASK) {
100 case MEMOP_FREE: 100 case MEMOP_FREE:
101 { 101 {
102 // TODO(Subv): What happens if an application tries to FREE a block of memory that has a SharedMemory pointing to it?
102 if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { 103 if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) {
103 ResultCode result = process.HeapFree(addr0, size); 104 ResultCode result = process.HeapFree(addr0, size);
104 if (result.IsError()) return result; 105 if (result.IsError()) return result;
@@ -160,8 +161,6 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o
160 LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d", 161 LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d",
161 handle, addr, permissions, other_permissions); 162 handle, addr, permissions, other_permissions);
162 163
163 // TODO(Subv): The same process that created a SharedMemory object can not map it in its own address space
164
165 SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle); 164 SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle);
166 if (shared_memory == nullptr) 165 if (shared_memory == nullptr)
167 return ERR_INVALID_HANDLE; 166 return ERR_INVALID_HANDLE;
@@ -176,7 +175,7 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o
176 case MemoryPermission::WriteExecute: 175 case MemoryPermission::WriteExecute:
177 case MemoryPermission::ReadWriteExecute: 176 case MemoryPermission::ReadWriteExecute:
178 case MemoryPermission::DontCare: 177 case MemoryPermission::DontCare:
179 return shared_memory->Map(addr, permissions_type, 178 return shared_memory->Map(Kernel::g_current_process.get(), addr, permissions_type,
180 static_cast<MemoryPermission>(other_permissions)); 179 static_cast<MemoryPermission>(other_permissions));
181 default: 180 default:
182 LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions); 181 LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions);
@@ -196,7 +195,7 @@ static ResultCode UnmapMemoryBlock(Handle handle, u32 addr) {
196 if (shared_memory == nullptr) 195 if (shared_memory == nullptr)
197 return ERR_INVALID_HANDLE; 196 return ERR_INVALID_HANDLE;
198 197
199 return shared_memory->Unmap(addr); 198 return shared_memory->Unmap(Kernel::g_current_process.get(), addr);
200} 199}
201 200
202/// Connect to an OS service given the port name, returns the handle to the port to out 201/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -790,18 +789,44 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32
790 if (size % Memory::PAGE_SIZE != 0) 789 if (size % Memory::PAGE_SIZE != 0)
791 return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); 790 return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
792 791
793 // TODO(Subv): Return E0A01BF5 if the address is not in the application's heap 792 SharedPtr<SharedMemory> shared_memory = nullptr;
794
795 // TODO(Subv): Implement this function properly
796 793
797 using Kernel::MemoryPermission; 794 using Kernel::MemoryPermission;
798 SharedPtr<SharedMemory> shared_memory = SharedMemory::Create(size, 795 auto VerifyPermissions = [](MemoryPermission permission) {
799 (MemoryPermission)my_permission, (MemoryPermission)other_permission); 796 // SharedMemory blocks can not be created with Execute permissions
800 // Map the SharedMemory to the specified address 797 switch (permission) {
801 shared_memory->base_address = addr; 798 case MemoryPermission::None:
799 case MemoryPermission::Read:
800 case MemoryPermission::Write:
801 case MemoryPermission::ReadWrite:
802 case MemoryPermission::DontCare:
803 return true;
804 default:
805 return false;
806 }
807 };
808
809 if (!VerifyPermissions(static_cast<MemoryPermission>(my_permission)) ||
810 !VerifyPermissions(static_cast<MemoryPermission>(other_permission)))
811 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS,
812 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
813
814 if (addr < Memory::PROCESS_IMAGE_VADDR || addr + size > Memory::SHARED_MEMORY_VADDR_END) {
815 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
816 }
817
818 // When trying to create a memory block with address = 0,
819 // if the process has the Shared Device Memory flag in the exheader,
820 // then we have to allocate from the same region as the caller process instead of the BASE region.
821 Kernel::MemoryRegion region = Kernel::MemoryRegion::BASE;
822 if (addr == 0 && Kernel::g_current_process->flags.shared_device_mem)
823 region = Kernel::g_current_process->flags.memory_region;
824
825 shared_memory = SharedMemory::Create(Kernel::g_current_process, size,
826 static_cast<MemoryPermission>(my_permission), static_cast<MemoryPermission>(other_permission), addr, region);
802 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory))); 827 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory)));
803 828
804 LOG_WARNING(Kernel_SVC, "(STUBBED) called addr=0x%08X", addr); 829 LOG_WARNING(Kernel_SVC, "called addr=0x%08X", addr);
805 return RESULT_SUCCESS; 830 return RESULT_SUCCESS;
806} 831}
807 832
diff --git a/src/core/memory.h b/src/core/memory.h
index 9caa3c3f5..126d60471 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -100,15 +100,9 @@ enum : VAddr {
100 SHARED_PAGE_SIZE = 0x00001000, 100 SHARED_PAGE_SIZE = 0x00001000,
101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, 101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
102 102
103 // TODO(yuriks): The size of this area is dynamic, the kernel grows
104 // it as more and more threads are created. For now we'll just use a
105 // hardcoded value.
106 /// Area where TLS (Thread-Local Storage) buffers are allocated. 103 /// Area where TLS (Thread-Local Storage) buffers are allocated.
107 TLS_AREA_VADDR = 0x1FF82000, 104 TLS_AREA_VADDR = 0x1FF82000,
108 TLS_ENTRY_SIZE = 0x200, 105 TLS_ENTRY_SIZE = 0x200,
109 TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
110 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
111
112 106
113 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. 107 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
114 NEW_LINEAR_HEAP_VADDR = 0x30000000, 108 NEW_LINEAR_HEAP_VADDR = 0x30000000,
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index e7dc5ddac..ad0da796e 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -128,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
128 128
129 // TODO: Verify that this actually modifies the register! 129 // TODO: Verify that this actually modifies the register!
130 if (setup.index < 15) { 130 if (setup.index < 15) {
131 g_state.vs.default_attributes[setup.index] = attribute; 131 g_state.vs_default_attributes[setup.index] = attribute;
132 setup.index++; 132 setup.index++;
133 } else { 133 } else {
134 // Put each attribute into an immediate input buffer. 134 // Put each attribute into an immediate input buffer.
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 1059c6ae4..495174c25 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -25,6 +25,8 @@ struct State {
25 Shader::ShaderSetup vs; 25 Shader::ShaderSetup vs;
26 Shader::ShaderSetup gs; 26 Shader::ShaderSetup gs;
27 27
28 std::array<Math::Vec4<float24>, 16> vs_default_attributes;
29
28 struct { 30 struct {
29 union LutEntry { 31 union LutEntry {
30 // Used for raw access 32 // Used for raw access
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4222945a4..bcd1ae78d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -104,7 +104,6 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
104 104
105 // Sync fixed function OpenGL state 105 // Sync fixed function OpenGL state
106 SyncCullMode(); 106 SyncCullMode();
107 SyncDepthModifiers();
108 SyncBlendEnabled(); 107 SyncBlendEnabled();
109 SyncBlendFuncs(); 108 SyncBlendFuncs();
110 SyncBlendColor(); 109 SyncBlendColor();
@@ -259,8 +258,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
259 258
260 // Depth modifiers 259 // Depth modifiers
261 case PICA_REG_INDEX(viewport_depth_range): 260 case PICA_REG_INDEX(viewport_depth_range):
261 SyncDepthScale();
262 break;
262 case PICA_REG_INDEX(viewport_depth_near_plane): 263 case PICA_REG_INDEX(viewport_depth_near_plane):
263 SyncDepthModifiers(); 264 SyncDepthOffset();
264 break; 265 break;
265 266
266 // Depth buffering 267 // Depth buffering
@@ -880,6 +881,8 @@ void RasterizerOpenGL::SetShader() {
880 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 881 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
881 882
882 // Update uniforms 883 // Update uniforms
884 SyncDepthScale();
885 SyncDepthOffset();
883 SyncAlphaTest(); 886 SyncAlphaTest();
884 SyncCombinerColor(); 887 SyncCombinerColor();
885 auto& tev_stages = Pica::g_state.regs.GetTevStages(); 888 auto& tev_stages = Pica::g_state.regs.GetTevStages();
@@ -922,13 +925,20 @@ void RasterizerOpenGL::SyncCullMode() {
922 } 925 }
923} 926}
924 927
925void RasterizerOpenGL::SyncDepthModifiers() { 928void RasterizerOpenGL::SyncDepthScale() {
926 float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 929 float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
927 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); 930 if (depth_scale != uniform_block_data.data.depth_scale) {
931 uniform_block_data.data.depth_scale = depth_scale;
932 uniform_block_data.dirty = true;
933 }
934}
928 935
929 uniform_block_data.data.depth_scale = depth_scale; 936void RasterizerOpenGL::SyncDepthOffset() {
930 uniform_block_data.data.depth_offset = depth_offset; 937 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
931 uniform_block_data.dirty = true; 938 if (depth_offset != uniform_block_data.data.depth_offset) {
939 uniform_block_data.data.depth_offset = depth_offset;
940 uniform_block_data.dirty = true;
941 }
932} 942}
933 943
934void RasterizerOpenGL::SyncBlendEnabled() { 944void RasterizerOpenGL::SyncBlendEnabled() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index eed00011a..d70369400 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -339,8 +339,11 @@ private:
339 /// Syncs the cull mode to match the PICA register 339 /// Syncs the cull mode to match the PICA register
340 void SyncCullMode(); 340 void SyncCullMode();
341 341
342 /// Syncs the depth scale and offset to match the PICA registers 342 /// Syncs the depth scale to match the PICA register
343 void SyncDepthModifiers(); 343 void SyncDepthScale();
344
345 /// Syncs the depth offset to match the PICA register
346 void SyncDepthOffset();
344 347
345 /// Syncs the blend enabled status to match the PICA register 348 /// Syncs the blend enabled status to match the PICA register
346 void SyncBlendEnabled(); 349 void SyncBlendEnabled();
@@ -413,7 +416,7 @@ private:
413 UniformData data; 416 UniformData data;
414 bool lut_dirty[6]; 417 bool lut_dirty[6];
415 bool dirty; 418 bool dirty;
416 } uniform_block_data; 419 } uniform_block_data = {};
417 420
418 std::array<SamplerInfo, 3> texture_samplers; 421 std::array<SamplerInfo, 3> texture_samplers;
419 OGLVertexArray vertex_array; 422 OGLVertexArray vertex_array;
@@ -422,5 +425,5 @@ private:
422 OGLFramebuffer framebuffer; 425 OGLFramebuffer framebuffer;
423 426
424 std::array<OGLTexture, 6> lighting_luts; 427 std::array<OGLTexture, 6> lighting_luts;
425 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 428 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
426}; 429};
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 449fc703f..161097610 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -64,10 +64,10 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
64 64
65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
66 auto& config = g_state.regs.vs; 66 auto& config = g_state.regs.vs;
67 auto& setup = g_state.vs;
67 68
68 MICROPROFILE_SCOPE(GPU_Shader); 69 MICROPROFILE_SCOPE(GPU_Shader);
69 70
70 state.program_counter = config.main_offset;
71 state.debug.max_offset = 0; 71 state.debug.max_offset = 0;
72 state.debug.max_opdesc_id = 0; 72 state.debug.max_opdesc_id = 0;
73 73
@@ -82,11 +82,11 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
82 82
83#ifdef ARCHITECTURE_x86_64 83#ifdef ARCHITECTURE_x86_64
84 if (VideoCore::g_shader_jit_enabled) 84 if (VideoCore::g_shader_jit_enabled)
85 jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); 85 jit_shader->Run(setup, state, config.main_offset);
86 else 86 else
87 RunInterpreter(state); 87 RunInterpreter(setup, state, config.main_offset);
88#else 88#else
89 RunInterpreter(state); 89 RunInterpreter(setup, state, config.main_offset);
90#endif // ARCHITECTURE_x86_64 90#endif // ARCHITECTURE_x86_64
91 91
92 // Setup output data 92 // Setup output data
@@ -143,7 +143,6 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
143DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 143DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
144 UnitState<true> state; 144 UnitState<true> state;
145 145
146 state.program_counter = config.main_offset;
147 state.debug.max_offset = 0; 146 state.debug.max_offset = 0;
148 state.debug.max_opdesc_id = 0; 147 state.debug.max_opdesc_id = 0;
149 148
@@ -158,7 +157,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
158 state.conditional_code[0] = false; 157 state.conditional_code[0] = false;
159 state.conditional_code[1] = false; 158 state.conditional_code[1] = false;
160 159
161 RunInterpreter(state); 160 RunInterpreter(setup, state, config.main_offset);
162 return state.debug; 161 return state.debug;
163} 162}
164 163
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 7f417675a..84898f21c 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -272,38 +272,21 @@ struct UnitState {
272 } registers; 272 } registers;
273 static_assert(std::is_pod<Registers>::value, "Structure is not POD"); 273 static_assert(std::is_pod<Registers>::value, "Structure is not POD");
274 274
275 u32 program_counter;
276 bool conditional_code[2]; 275 bool conditional_code[2];
277 276
278 // Two Address registers and one loop counter 277 // Two Address registers and one loop counter
279 // TODO: How many bits do these actually have? 278 // TODO: How many bits do these actually have?
280 s32 address_registers[3]; 279 s32 address_registers[3];
281 280
282 enum {
283 INVALID_ADDRESS = 0xFFFFFFFF
284 };
285
286 struct CallStackElement {
287 u32 final_address; // Address upon which we jump to return_address
288 u32 return_address; // Where to jump when leaving scope
289 u8 repeat_counter; // How often to repeat until this call stack element is removed
290 u8 loop_increment; // Which value to add to the loop counter after an iteration
291 // TODO: Should this be a signed value? Does it even matter?
292 u32 loop_address; // The address where we'll return to after each loop iteration
293 };
294
295 // TODO: Is there a maximal size for this?
296 boost::container::static_vector<CallStackElement, 16> call_stack;
297
298 DebugData<Debug> debug; 281 DebugData<Debug> debug;
299 282
300 static size_t InputOffset(const SourceRegister& reg) { 283 static size_t InputOffset(const SourceRegister& reg) {
301 switch (reg.GetRegisterType()) { 284 switch (reg.GetRegisterType()) {
302 case RegisterType::Input: 285 case RegisterType::Input:
303 return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 286 return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
304 287
305 case RegisterType::Temporary: 288 case RegisterType::Temporary:
306 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 289 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
307 290
308 default: 291 default:
309 UNREACHABLE(); 292 UNREACHABLE();
@@ -314,10 +297,10 @@ struct UnitState {
314 static size_t OutputOffset(const DestRegister& reg) { 297 static size_t OutputOffset(const DestRegister& reg) {
315 switch (reg.GetRegisterType()) { 298 switch (reg.GetRegisterType()) {
316 case RegisterType::Output: 299 case RegisterType::Output:
317 return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 300 return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
318 301
319 case RegisterType::Temporary: 302 case RegisterType::Temporary:
320 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 303 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
321 304
322 default: 305 default:
323 UNREACHABLE(); 306 UNREACHABLE();
@@ -340,7 +323,22 @@ struct ShaderSetup {
340 std::array<Math::Vec4<u8>, 4> i; 323 std::array<Math::Vec4<u8>, 4> i;
341 } uniforms; 324 } uniforms;
342 325
343 Math::Vec4<float24> default_attributes[16]; 326 static size_t UniformOffset(RegisterType type, unsigned index) {
327 switch (type) {
328 case RegisterType::FloatUniform:
329 return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
330
331 case RegisterType::BoolUniform:
332 return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
333
334 case RegisterType::IntUniform:
335 return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
336
337 default:
338 UNREACHABLE();
339 return 0;
340 }
341 }
344 342
345 std::array<u32, 1024> program_code; 343 std::array<u32, 1024> program_code;
346 std::array<u32, 1024> swizzle_data; 344 std::array<u32, 1024> swizzle_data;
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 7710f7fbc..714e8bfd5 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -29,8 +29,24 @@ namespace Pica {
29 29
30namespace Shader { 30namespace Shader {
31 31
32constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF;
33
34struct CallStackElement {
35 u32 final_address; // Address upon which we jump to return_address
36 u32 return_address; // Where to jump when leaving scope
37 u8 repeat_counter; // How often to repeat until this call stack element is removed
38 u8 loop_increment; // Which value to add to the loop counter after an iteration
39 // TODO: Should this be a signed value? Does it even matter?
40 u32 loop_address; // The address where we'll return to after each loop iteration
41};
42
32template<bool Debug> 43template<bool Debug>
33void RunInterpreter(UnitState<Debug>& state) { 44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
45 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack;
47
48 u32 program_counter = offset;
49
34 const auto& uniforms = g_state.vs.uniforms; 50 const auto& uniforms = g_state.vs.uniforms;
35 const auto& swizzle_data = g_state.vs.swizzle_data; 51 const auto& swizzle_data = g_state.vs.swizzle_data;
36 const auto& program_code = g_state.vs.program_code; 52 const auto& program_code = g_state.vs.program_code;
@@ -41,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) {
41 unsigned iteration = 0; 57 unsigned iteration = 0;
42 bool exit_loop = false; 58 bool exit_loop = false;
43 while (!exit_loop) { 59 while (!exit_loop) {
44 if (!state.call_stack.empty()) { 60 if (!call_stack.empty()) {
45 auto& top = state.call_stack.back(); 61 auto& top = call_stack.back();
46 if (state.program_counter == top.final_address) { 62 if (program_counter == top.final_address) {
47 state.address_registers[2] += top.loop_increment; 63 state.address_registers[2] += top.loop_increment;
48 64
49 if (top.repeat_counter-- == 0) { 65 if (top.repeat_counter-- == 0) {
50 state.program_counter = top.return_address; 66 program_counter = top.return_address;
51 state.call_stack.pop_back(); 67 call_stack.pop_back();
52 } else { 68 } else {
53 state.program_counter = top.loop_address; 69 program_counter = top.loop_address;
54 } 70 }
55 71
56 // TODO: Is "trying again" accurate to hardware? 72 // TODO: Is "trying again" accurate to hardware?
@@ -58,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) {
58 } 74 }
59 } 75 }
60 76
61 const Instruction instr = { program_code[state.program_counter] }; 77 const Instruction instr = { program_code[program_counter] };
62 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
63 79
64 static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, 80 static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
65 u32 return_offset, u8 repeat_count, u8 loop_increment) { 81 u32 return_offset, u8 repeat_count, u8 loop_increment) {
66 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
67 ASSERT(state.call_stack.size() < state.call_stack.capacity()); 83 ASSERT(call_stack.size() < call_stack.capacity());
68 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
69 }; 85 };
70 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); 86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
71 if (iteration > 0) 87 if (iteration > 0)
72 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); 88 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter);
73 89
74 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); 90 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter);
75 91
76 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { 92 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
77 switch (source_reg.GetRegisterType()) { 93 switch (source_reg.GetRegisterType()) {
@@ -519,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) {
519 case OpCode::Id::JMPC: 535 case OpCode::Id::JMPC:
520 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
521 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
522 state.program_counter = instr.flow_control.dest_offset - 1; 538 program_counter = instr.flow_control.dest_offset - 1;
523 } 539 }
524 break; 540 break;
525 541
@@ -527,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) {
527 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
528 544
529 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
530 state.program_counter = instr.flow_control.dest_offset - 1; 546 program_counter = instr.flow_control.dest_offset - 1;
531 } 547 }
532 break; 548 break;
533 549
@@ -535,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) {
535 call(state, 551 call(state,
536 instr.flow_control.dest_offset, 552 instr.flow_control.dest_offset,
537 instr.flow_control.num_instructions, 553 instr.flow_control.num_instructions,
538 state.program_counter + 1, 0, 0); 554 program_counter + 1, 0, 0);
539 break; 555 break;
540 556
541 case OpCode::Id::CALLU: 557 case OpCode::Id::CALLU:
@@ -544,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) {
544 call(state, 560 call(state,
545 instr.flow_control.dest_offset, 561 instr.flow_control.dest_offset,
546 instr.flow_control.num_instructions, 562 instr.flow_control.num_instructions,
547 state.program_counter + 1, 0, 0); 563 program_counter + 1, 0, 0);
548 } 564 }
549 break; 565 break;
550 566
@@ -554,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) {
554 call(state, 570 call(state,
555 instr.flow_control.dest_offset, 571 instr.flow_control.dest_offset,
556 instr.flow_control.num_instructions, 572 instr.flow_control.num_instructions,
557 state.program_counter + 1, 0, 0); 573 program_counter + 1, 0, 0);
558 } 574 }
559 break; 575 break;
560 576
@@ -565,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) {
565 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
566 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 582 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
567 call(state, 583 call(state,
568 state.program_counter + 1, 584 program_counter + 1,
569 instr.flow_control.dest_offset - state.program_counter - 1, 585 instr.flow_control.dest_offset - program_counter - 1,
570 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
571 } else { 587 } else {
572 call(state, 588 call(state,
@@ -584,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) {
584 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
585 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
586 call(state, 602 call(state,
587 state.program_counter + 1, 603 program_counter + 1,
588 instr.flow_control.dest_offset - state.program_counter - 1, 604 instr.flow_control.dest_offset - program_counter - 1,
589 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
590 } else { 606 } else {
591 call(state, 607 call(state,
@@ -607,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) {
607 623
608 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
609 call(state, 625 call(state,
610 state.program_counter + 1, 626 program_counter + 1,
611 instr.flow_control.dest_offset - state.program_counter + 1, 627 instr.flow_control.dest_offset - program_counter + 1,
612 instr.flow_control.dest_offset + 1, 628 instr.flow_control.dest_offset + 1,
613 loop_param.x, 629 loop_param.x,
614 loop_param.z); 630 loop_param.z);
@@ -625,14 +641,14 @@ void RunInterpreter(UnitState<Debug>& state) {
625 } 641 }
626 } 642 }
627 643
628 ++state.program_counter; 644 ++program_counter;
629 ++iteration; 645 ++iteration;
630 } 646 }
631} 647}
632 648
633// Explicit instantiation 649// Explicit instantiation
634template void RunInterpreter(UnitState<false>& state); 650template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset);
635template void RunInterpreter(UnitState<true>& state); 651template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset);
636 652
637} // namespace 653} // namespace
638 654
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 6048cdf3a..bb3ce1c6e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -11,7 +11,7 @@ namespace Shader {
11template <bool Debug> struct UnitState; 11template <bool Debug> struct UnitState;
12 12
13template<bool Debug> 13template<bool Debug>
14void RunInterpreter(UnitState<Debug>& state); 14void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
15 15
16} // namespace 16} // namespace
17 17
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 99f6c51eb..43e7e6b4c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
102// purposes, as documented below: 102// purposes, as documented below:
103 103
104/// Pointer to the uniform memory 104/// Pointer to the uniform memory
105static const X64Reg UNIFORMS = R9; 105static const X64Reg SETUP = R9;
106/// The two 32-bit VS address offset registers set by the MOVA instruction 106/// The two 32-bit VS address offset registers set by the MOVA instruction
107static const X64Reg ADDROFFS_REG_0 = R10; 107static const X64Reg ADDROFFS_REG_0 = R10;
108static const X64Reg ADDROFFS_REG_1 = R11; 108static const X64Reg ADDROFFS_REG_1 = R11;
@@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
117/// Result of the previous CMP instruction for the Y-component comparison 117/// Result of the previous CMP instruction for the Y-component comparison
118static const X64Reg COND1 = R14; 118static const X64Reg COND1 = R14;
119/// Pointer to the UnitState instance for the current VS unit 119/// Pointer to the UnitState instance for the current VS unit
120static const X64Reg REGISTERS = R15; 120static const X64Reg STATE = R15;
121/// SIMD scratch register 121/// SIMD scratch register
122static const X64Reg SCRATCH = XMM0; 122static const X64Reg SCRATCH = XMM0;
123/// Loaded with the first swizzled source register, otherwise can be used as a scratch register 123/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
136// State registers that must not be modified by external functions calls 136// State registers that must not be modified by external functions calls
137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
138static const BitSet32 persistent_regs = { 138static const BitSet32 persistent_regs = {
139 UNIFORMS, REGISTERS, // Pointers to register blocks 139 SETUP, STATE, // Pointers to register blocks
140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers 140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
141 ONE+16, NEGBIT+16, // Constants 141 ONE+16, NEGBIT+16, // Constants
142}; 142};
@@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
177 size_t src_offset; 177 size_t src_offset;
178 178
179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { 179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
180 src_ptr = UNIFORMS; 180 src_ptr = SETUP;
181 src_offset = src_reg.GetIndex() * sizeof(float24) * 4; 181 src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
182 } else { 182 } else {
183 src_ptr = REGISTERS; 183 src_ptr = STATE;
184 src_offset = UnitState<false>::InputOffset(src_reg); 184 src_offset = UnitState<false>::InputOffset(src_reg);
185 } 185 }
186 186
@@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
264 // If all components are enabled, write the result to the destination register 264 // If all components are enabled, write the result to the destination register
265 if (swiz.dest_mask == NO_DEST_REG_MASK) { 265 if (swiz.dest_mask == NO_DEST_REG_MASK) {
266 // Store dest back to memory 266 // Store dest back to memory
267 MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); 267 MOVAPS(MDisp(STATE, dest_offset_disp), src);
268 268
269 } else { 269 } else {
270 // Not all components are enabled, so mask the result when storing to the destination register... 270 // Not all components are enabled, so mask the result when storing to the destination register...
271 MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); 271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
272 272
273 if (Common::GetCPUCaps().sse4_1) { 273 if (Common::GetCPUCaps().sse4_1) {
274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
287 } 287 }
288 288
289 // Store dest back to memory 289 // Store dest back to memory
290 MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); 290 MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
291 } 291 }
292} 292}
293 293
@@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
336} 336}
337 337
338void JitShader::Compile_UniformCondition(Instruction instr) { 338void JitShader::Compile_UniformCondition(Instruction instr) {
339 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); 339 int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
340 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 340 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
341} 341}
342 342
343BitSet32 JitShader::PersistentCallerSavedRegs() { 343BitSet32 JitShader::PersistentCallerSavedRegs() {
@@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
714 714
715 looping = true; 715 looping = true;
716 716
717 int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); 717 int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
718 MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); 718 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
720 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 720 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@@ -826,8 +826,8 @@ void JitShader::Compile() {
826 // The stack pointer is 8 modulo 16 at the entry of a procedure 826 // The stack pointer is 8 modulo 16 at the entry of a procedure
827 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 827 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
828 828
829 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); 829 MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
830 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); 830 MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
831 831
832 // Zero address/loop registers 832 // Zero address/loop registers
833 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); 833 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@@ -845,7 +845,7 @@ void JitShader::Compile() {
845 MOVAPS(NEGBIT, MatR(RAX)); 845 MOVAPS(NEGBIT, MatR(RAX));
846 846
847 // Jump to start of the shader program 847 // Jump to start of the shader program
848 JMPptr(R(ABI_PARAM2)); 848 JMPptr(R(ABI_PARAM3));
849 849
850 // Compile entire program 850 // Compile entire program
851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); 851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 30aa7ff30..5468459d4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
36public: 36public:
37 JitShader(); 37 JitShader();
38 38
39 void Run(void* registers, unsigned offset) const { 39 void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
40 program(registers, code_ptr[offset]); 40 program(&setup, &state, code_ptr[offset]);
41 } 41 }
42 42
43 void Compile(); 43 void Compile();
@@ -117,7 +117,7 @@ private:
117 /// Branches that need to be fixed up once the entire shader program is compiled 117 /// Branches that need to be fixed up once the entire shader program is compiled
118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; 118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
119 119
120 using CompiledShader = void(void* registers, const u8* start_addr); 120 using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
121 CompiledShader* program = nullptr; 121 CompiledShader* program = nullptr;
122}; 122};
123 123
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index 21ae52949..83896814f 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -124,7 +124,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
125 } else if (vertex_attribute_is_default[i]) { 125 } else if (vertex_attribute_is_default[i]) {
126 // Load the default attribute if we're configured to do so 126 // Load the default attribute if we're configured to do so
127 input.attr[i] = g_state.vs.default_attributes[i]; 127 input.attr[i] = g_state.vs_default_attributes[i];
128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", 128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
129 i, vertex, index, 129 i, vertex, index,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), 130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),