diff options
Diffstat (limited to 'src')
39 files changed, 834 insertions, 249 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index 13b5e400e..eba0a5697 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt | |||
| @@ -7,6 +7,7 @@ set(SRCS | |||
| 7 | hle/source.cpp | 7 | hle/source.cpp |
| 8 | interpolate.cpp | 8 | interpolate.cpp |
| 9 | sink_details.cpp | 9 | sink_details.cpp |
| 10 | time_stretch.cpp | ||
| 10 | ) | 11 | ) |
| 11 | 12 | ||
| 12 | set(HEADERS | 13 | set(HEADERS |
| @@ -21,6 +22,7 @@ set(HEADERS | |||
| 21 | null_sink.h | 22 | null_sink.h |
| 22 | sink.h | 23 | sink.h |
| 23 | sink_details.h | 24 | sink_details.h |
| 25 | time_stretch.h | ||
| 24 | ) | 26 | ) |
| 25 | 27 | ||
| 26 | include_directories(../../externals/soundtouch/include) | 28 | include_directories(../../externals/soundtouch/include) |
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp index 0cdbdb06a..5113ad8ca 100644 --- a/src/audio_core/hle/dsp.cpp +++ b/src/audio_core/hle/dsp.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "audio_core/hle/pipe.h" | 9 | #include "audio_core/hle/pipe.h" |
| 10 | #include "audio_core/hle/source.h" | 10 | #include "audio_core/hle/source.h" |
| 11 | #include "audio_core/sink.h" | 11 | #include "audio_core/sink.h" |
| 12 | #include "audio_core/time_stretch.h" | ||
| 12 | 13 | ||
| 13 | namespace DSP { | 14 | namespace DSP { |
| 14 | namespace HLE { | 15 | namespace HLE { |
| @@ -48,15 +49,29 @@ static std::array<Source, num_sources> sources = { | |||
| 48 | }; | 49 | }; |
| 49 | 50 | ||
| 50 | static std::unique_ptr<AudioCore::Sink> sink; | 51 | static std::unique_ptr<AudioCore::Sink> sink; |
| 52 | static AudioCore::TimeStretcher time_stretcher; | ||
| 51 | 53 | ||
| 52 | void Init() { | 54 | void Init() { |
| 53 | DSP::HLE::ResetPipes(); | 55 | DSP::HLE::ResetPipes(); |
| 56 | |||
| 54 | for (auto& source : sources) { | 57 | for (auto& source : sources) { |
| 55 | source.Reset(); | 58 | source.Reset(); |
| 56 | } | 59 | } |
| 60 | |||
| 61 | time_stretcher.Reset(); | ||
| 62 | if (sink) { | ||
| 63 | time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate()); | ||
| 64 | } | ||
| 57 | } | 65 | } |
| 58 | 66 | ||
| 59 | void Shutdown() { | 67 | void Shutdown() { |
| 68 | time_stretcher.Flush(); | ||
| 69 | while (true) { | ||
| 70 | std::vector<s16> residual_audio = time_stretcher.Process(sink->SamplesInQueue()); | ||
| 71 | if (residual_audio.empty()) | ||
| 72 | break; | ||
| 73 | sink->EnqueueSamples(residual_audio); | ||
| 74 | } | ||
| 60 | } | 75 | } |
| 61 | 76 | ||
| 62 | bool Tick() { | 77 | bool Tick() { |
| @@ -77,6 +92,7 @@ bool Tick() { | |||
| 77 | 92 | ||
| 78 | void SetSink(std::unique_ptr<AudioCore::Sink> sink_) { | 93 | void SetSink(std::unique_ptr<AudioCore::Sink> sink_) { |
| 79 | sink = std::move(sink_); | 94 | sink = std::move(sink_); |
| 95 | time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate()); | ||
| 80 | } | 96 | } |
| 81 | 97 | ||
| 82 | } // namespace HLE | 98 | } // namespace HLE |
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp new file mode 100644 index 000000000..ea38f40d0 --- /dev/null +++ b/src/audio_core/time_stretch.cpp | |||
| @@ -0,0 +1,144 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <chrono> | ||
| 6 | #include <cmath> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <SoundTouch.h> | ||
| 10 | |||
| 11 | #include "audio_core/audio_core.h" | ||
| 12 | #include "audio_core/time_stretch.h" | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "common/logging/log.h" | ||
| 16 | #include "common/math_util.h" | ||
| 17 | |||
| 18 | using steady_clock = std::chrono::steady_clock; | ||
| 19 | |||
| 20 | namespace AudioCore { | ||
| 21 | |||
| 22 | constexpr double MIN_RATIO = 0.1; | ||
| 23 | constexpr double MAX_RATIO = 100.0; | ||
| 24 | |||
| 25 | static double ClampRatio(double ratio) { | ||
| 26 | return MathUtil::Clamp(ratio, MIN_RATIO, MAX_RATIO); | ||
| 27 | } | ||
| 28 | |||
| 29 | constexpr double MIN_DELAY_TIME = 0.05; // Units: seconds | ||
| 30 | constexpr double MAX_DELAY_TIME = 0.25; // Units: seconds | ||
| 31 | constexpr size_t DROP_FRAMES_SAMPLE_DELAY = 16000; // Units: samples | ||
| 32 | |||
| 33 | constexpr double SMOOTHING_FACTOR = 0.007; | ||
| 34 | |||
| 35 | struct TimeStretcher::Impl { | ||
| 36 | soundtouch::SoundTouch soundtouch; | ||
| 37 | |||
| 38 | steady_clock::time_point frame_timer = steady_clock::now(); | ||
| 39 | size_t samples_queued = 0; | ||
| 40 | |||
| 41 | double smoothed_ratio = 1.0; | ||
| 42 | |||
| 43 | double sample_rate = static_cast<double>(native_sample_rate); | ||
| 44 | }; | ||
| 45 | |||
| 46 | std::vector<s16> TimeStretcher::Process(size_t samples_in_queue) { | ||
| 47 | // This is a very simple algorithm without any fancy control theory. It works and is stable. | ||
| 48 | |||
| 49 | double ratio = CalculateCurrentRatio(); | ||
| 50 | ratio = CorrectForUnderAndOverflow(ratio, samples_in_queue); | ||
| 51 | impl->smoothed_ratio = (1.0 - SMOOTHING_FACTOR) * impl->smoothed_ratio + SMOOTHING_FACTOR * ratio; | ||
| 52 | impl->smoothed_ratio = ClampRatio(impl->smoothed_ratio); | ||
| 53 | |||
| 54 | // SoundTouch's tempo definition the inverse of our ratio definition. | ||
| 55 | impl->soundtouch.setTempo(1.0 / impl->smoothed_ratio); | ||
| 56 | |||
| 57 | std::vector<s16> samples = GetSamples(); | ||
| 58 | if (samples_in_queue >= DROP_FRAMES_SAMPLE_DELAY) { | ||
| 59 | samples.clear(); | ||
| 60 | LOG_DEBUG(Audio, "Dropping frames!"); | ||
| 61 | } | ||
| 62 | return samples; | ||
| 63 | } | ||
| 64 | |||
| 65 | TimeStretcher::TimeStretcher() : impl(std::make_unique<Impl>()) { | ||
| 66 | impl->soundtouch.setPitch(1.0); | ||
| 67 | impl->soundtouch.setChannels(2); | ||
| 68 | impl->soundtouch.setSampleRate(native_sample_rate); | ||
| 69 | Reset(); | ||
| 70 | } | ||
| 71 | |||
| 72 | TimeStretcher::~TimeStretcher() { | ||
| 73 | impl->soundtouch.clear(); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TimeStretcher::SetOutputSampleRate(unsigned int sample_rate) { | ||
| 77 | impl->sample_rate = static_cast<double>(sample_rate); | ||
| 78 | impl->soundtouch.setRate(static_cast<double>(native_sample_rate) / impl->sample_rate); | ||
| 79 | } | ||
| 80 | |||
| 81 | void TimeStretcher::AddSamples(const s16* buffer, size_t num_samples) { | ||
| 82 | impl->soundtouch.putSamples(buffer, static_cast<uint>(num_samples)); | ||
| 83 | impl->samples_queued += num_samples; | ||
| 84 | } | ||
| 85 | |||
| 86 | void TimeStretcher::Flush() { | ||
| 87 | impl->soundtouch.flush(); | ||
| 88 | } | ||
| 89 | |||
| 90 | void TimeStretcher::Reset() { | ||
| 91 | impl->soundtouch.setTempo(1.0); | ||
| 92 | impl->soundtouch.clear(); | ||
| 93 | impl->smoothed_ratio = 1.0; | ||
| 94 | impl->frame_timer = steady_clock::now(); | ||
| 95 | impl->samples_queued = 0; | ||
| 96 | SetOutputSampleRate(native_sample_rate); | ||
| 97 | } | ||
| 98 | |||
| 99 | double TimeStretcher::CalculateCurrentRatio() { | ||
| 100 | const steady_clock::time_point now = steady_clock::now(); | ||
| 101 | const std::chrono::duration<double> duration = now - impl->frame_timer; | ||
| 102 | |||
| 103 | const double expected_time = static_cast<double>(impl->samples_queued) / static_cast<double>(native_sample_rate); | ||
| 104 | const double actual_time = duration.count(); | ||
| 105 | |||
| 106 | double ratio; | ||
| 107 | if (expected_time != 0) { | ||
| 108 | ratio = ClampRatio(actual_time / expected_time); | ||
| 109 | } else { | ||
| 110 | ratio = impl->smoothed_ratio; | ||
| 111 | } | ||
| 112 | |||
| 113 | impl->frame_timer = now; | ||
| 114 | impl->samples_queued = 0; | ||
| 115 | |||
| 116 | return ratio; | ||
| 117 | } | ||
| 118 | |||
| 119 | double TimeStretcher::CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const { | ||
| 120 | const size_t min_sample_delay = static_cast<size_t>(MIN_DELAY_TIME * impl->sample_rate); | ||
| 121 | const size_t max_sample_delay = static_cast<size_t>(MAX_DELAY_TIME * impl->sample_rate); | ||
| 122 | |||
| 123 | if (sample_delay < min_sample_delay) { | ||
| 124 | // Make the ratio bigger. | ||
| 125 | ratio = ratio > 1.0 ? ratio * ratio : sqrt(ratio); | ||
| 126 | } else if (sample_delay > max_sample_delay) { | ||
| 127 | // Make the ratio smaller. | ||
| 128 | ratio = ratio > 1.0 ? sqrt(ratio) : ratio * ratio; | ||
| 129 | } | ||
| 130 | |||
| 131 | return ClampRatio(ratio); | ||
| 132 | } | ||
| 133 | |||
| 134 | std::vector<s16> TimeStretcher::GetSamples() { | ||
| 135 | uint available = impl->soundtouch.numSamples(); | ||
| 136 | |||
| 137 | std::vector<s16> output(static_cast<size_t>(available) * 2); | ||
| 138 | |||
| 139 | impl->soundtouch.receiveSamples(output.data(), available); | ||
| 140 | |||
| 141 | return output; | ||
| 142 | } | ||
| 143 | |||
| 144 | } // namespace AudioCore | ||
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h new file mode 100644 index 000000000..1fde3f72a --- /dev/null +++ b/src/audio_core/time_stretch.h | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstddef> | ||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace AudioCore { | ||
| 12 | |||
| 13 | class TimeStretcher final { | ||
| 14 | public: | ||
| 15 | TimeStretcher(); | ||
| 16 | ~TimeStretcher(); | ||
| 17 | |||
| 18 | /** | ||
| 19 | * Set sample rate for the samples that Process returns. | ||
| 20 | * @param sample_rate The sample rate. | ||
| 21 | */ | ||
| 22 | void SetOutputSampleRate(unsigned int sample_rate); | ||
| 23 | |||
| 24 | /** | ||
| 25 | * Add samples to be processed. | ||
| 26 | * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format. | ||
| 27 | * @param num_sample Number of samples. | ||
| 28 | */ | ||
| 29 | void AddSamples(const s16* sample_buffer, size_t num_samples); | ||
| 30 | |||
| 31 | /// Flush audio remaining in internal buffers. | ||
| 32 | void Flush(); | ||
| 33 | |||
| 34 | /// Resets internal state and clears buffers. | ||
| 35 | void Reset(); | ||
| 36 | |||
| 37 | /** | ||
| 38 | * Does audio stretching and produces the time-stretched samples. | ||
| 39 | * Timer calculations use sample_delay to determine how much of a margin we have. | ||
| 40 | * @param sample_delay How many samples are buffered downstream of this module and haven't been played yet. | ||
| 41 | * @return Samples to play in interleaved stereo PCM16 format. | ||
| 42 | */ | ||
| 43 | std::vector<s16> Process(size_t sample_delay); | ||
| 44 | |||
| 45 | private: | ||
| 46 | struct Impl; | ||
| 47 | std::unique_ptr<Impl> impl; | ||
| 48 | |||
| 49 | /// INTERNAL: ratio = wallclock time / emulated time | ||
| 50 | double CalculateCurrentRatio(); | ||
| 51 | /// INTERNAL: If we have too many or too few samples downstream, nudge ratio in the appropriate direction. | ||
| 52 | double CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const; | ||
| 53 | /// INTERNAL: Gets the time-stretched samples from SoundTouch. | ||
| 54 | std::vector<s16> GetSamples(); | ||
| 55 | }; | ||
| 56 | |||
| 57 | } // namespace AudioCore | ||
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp index 1402f8e79..9c80f7ec9 100644 --- a/src/citra_qt/debugger/graphics_tracing.cpp +++ b/src/citra_qt/debugger/graphics_tracing.cpp | |||
| @@ -74,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() { | |||
| 74 | std::array<u32, 4 * 16> default_attributes; | 74 | std::array<u32, 4 * 16> default_attributes; |
| 75 | for (unsigned i = 0; i < 16; ++i) { | 75 | for (unsigned i = 0; i < 16; ++i) { |
| 76 | for (unsigned comp = 0; comp < 3; ++comp) { | 76 | for (unsigned comp = 0; comp < 3; ++comp) { |
| 77 | default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); | 77 | default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32()); |
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | 80 | ||
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index a8d891689..f6a7566bf 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -52,6 +52,7 @@ set(SRCS | |||
| 52 | hle/service/apt/apt_a.cpp | 52 | hle/service/apt/apt_a.cpp |
| 53 | hle/service/apt/apt_s.cpp | 53 | hle/service/apt/apt_s.cpp |
| 54 | hle/service/apt/apt_u.cpp | 54 | hle/service/apt/apt_u.cpp |
| 55 | hle/service/apt/bcfnt/bcfnt.cpp | ||
| 55 | hle/service/boss/boss.cpp | 56 | hle/service/boss/boss.cpp |
| 56 | hle/service/boss/boss_p.cpp | 57 | hle/service/boss/boss_p.cpp |
| 57 | hle/service/boss/boss_u.cpp | 58 | hle/service/boss/boss_u.cpp |
| @@ -185,6 +186,7 @@ set(HEADERS | |||
| 185 | hle/service/apt/apt_a.h | 186 | hle/service/apt/apt_a.h |
| 186 | hle/service/apt/apt_s.h | 187 | hle/service/apt/apt_s.h |
| 187 | hle/service/apt/apt_u.h | 188 | hle/service/apt/apt_u.h |
| 189 | hle/service/apt/bcfnt/bcfnt.h | ||
| 188 | hle/service/boss/boss.h | 190 | hle/service/boss/boss.h |
| 189 | hle/service/boss/boss_p.h | 191 | hle/service/boss/boss_p.h |
| 190 | hle/service/boss/boss_u.h | 192 | hle/service/boss/boss_u.h |
diff --git a/src/core/hle/applets/applet.h b/src/core/hle/applets/applet.h index af442f81d..754c6f7db 100644 --- a/src/core/hle/applets/applet.h +++ b/src/core/hle/applets/applet.h | |||
| @@ -65,6 +65,7 @@ protected: | |||
| 65 | virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0; | 65 | virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0; |
| 66 | 66 | ||
| 67 | Service::APT::AppletId id; ///< Id of this Applet | 67 | Service::APT::AppletId id; ///< Id of this Applet |
| 68 | std::shared_ptr<std::vector<u8>> heap_memory; ///< Heap memory for this Applet | ||
| 68 | }; | 69 | }; |
| 69 | 70 | ||
| 70 | /// Returns whether a library applet is currently running | 71 | /// Returns whether a library applet is currently running |
diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp index b4456ca90..bf39eca22 100644 --- a/src/core/hle/applets/mii_selector.cpp +++ b/src/core/hle/applets/mii_selector.cpp | |||
| @@ -35,9 +35,14 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p | |||
| 35 | ASSERT(sizeof(capture_info) == parameter.buffer_size); | 35 | ASSERT(sizeof(capture_info) == parameter.buffer_size); |
| 36 | 36 | ||
| 37 | memcpy(&capture_info, parameter.data, sizeof(capture_info)); | 37 | memcpy(&capture_info, parameter.data, sizeof(capture_info)); |
| 38 | |||
| 38 | using Kernel::MemoryPermission; | 39 | using Kernel::MemoryPermission; |
| 39 | framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, | 40 | // Allocate a heap block of the required size for this applet. |
| 40 | MemoryPermission::ReadWrite, "MiiSelector Memory"); | 41 | heap_memory = std::make_shared<std::vector<u8>>(capture_info.size); |
| 42 | // Create a SharedMemory that directly points to this heap block. | ||
| 43 | framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(), | ||
| 44 | MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, | ||
| 45 | "MiiSelector Memory"); | ||
| 41 | 46 | ||
| 42 | // Send the response message with the newly created SharedMemory | 47 | // Send the response message with the newly created SharedMemory |
| 43 | Service::APT::MessageParameter result; | 48 | Service::APT::MessageParameter result; |
diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp index 87238aa1c..90c6adc65 100644 --- a/src/core/hle/applets/swkbd.cpp +++ b/src/core/hle/applets/swkbd.cpp | |||
| @@ -40,8 +40,12 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con | |||
| 40 | memcpy(&capture_info, parameter.data, sizeof(capture_info)); | 40 | memcpy(&capture_info, parameter.data, sizeof(capture_info)); |
| 41 | 41 | ||
| 42 | using Kernel::MemoryPermission; | 42 | using Kernel::MemoryPermission; |
| 43 | framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, | 43 | // Allocate a heap block of the required size for this applet. |
| 44 | MemoryPermission::ReadWrite, "SoftwareKeyboard Memory"); | 44 | heap_memory = std::make_shared<std::vector<u8>>(capture_info.size); |
| 45 | // Create a SharedMemory that directly points to this heap block. | ||
| 46 | framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(), | ||
| 47 | MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, | ||
| 48 | "SoftwareKeyboard Memory"); | ||
| 45 | 49 | ||
| 46 | // Send the response message with the newly created SharedMemory | 50 | // Send the response message with the newly created SharedMemory |
| 47 | Service::APT::MessageParameter result; | 51 | Service::APT::MessageParameter result; |
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h index 4d718b681..bf7f875b6 100644 --- a/src/core/hle/function_wrappers.h +++ b/src/core/hle/function_wrappers.h | |||
| @@ -170,7 +170,8 @@ template<ResultCode func(s64*, u32, s32)> void Wrap() { | |||
| 170 | 170 | ||
| 171 | template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() { | 171 | template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() { |
| 172 | u32 param_1 = 0; | 172 | u32 param_1 = 0; |
| 173 | u32 retval = func(¶m_1, PARAM(1), PARAM(2), PARAM(3), PARAM(4)).raw; | 173 | // The last parameter is passed in R0 instead of R4 |
| 174 | u32 retval = func(¶m_1, PARAM(1), PARAM(2), PARAM(3), PARAM(0)).raw; | ||
| 174 | Core::g_app_core->SetReg(1, param_1); | 175 | Core::g_app_core->SetReg(1, param_1); |
| 175 | FuncReturn(retval); | 176 | FuncReturn(retval); |
| 176 | } | 177 | } |
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 862643448..17ae87aef 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp | |||
| @@ -55,6 +55,9 @@ void MemoryInit(u32 mem_type) { | |||
| 55 | memory_regions[i].size = memory_region_sizes[mem_type][i]; | 55 | memory_regions[i].size = memory_region_sizes[mem_type][i]; |
| 56 | memory_regions[i].used = 0; | 56 | memory_regions[i].used = 0; |
| 57 | memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); | 57 | memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); |
| 58 | // Reserve enough space for this region of FCRAM. | ||
| 59 | // We do not want this block of memory to be relocated when allocating from it. | ||
| 60 | memory_regions[i].linear_heap_memory->reserve(memory_regions[i].size); | ||
| 58 | 61 | ||
| 59 | base += memory_regions[i].size; | 62 | base += memory_regions[i].size; |
| 60 | } | 63 | } |
| @@ -107,9 +110,7 @@ struct MemoryArea { | |||
| 107 | 110 | ||
| 108 | // We don't declare the IO regions in here since its handled by other means. | 111 | // We don't declare the IO regions in here since its handled by other means. |
| 109 | static MemoryArea memory_areas[] = { | 112 | static MemoryArea memory_areas[] = { |
| 110 | {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory | ||
| 111 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) | 113 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) |
| 112 | {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory | ||
| 113 | }; | 114 | }; |
| 114 | 115 | ||
| 115 | } | 116 | } |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 0546f6e16..69302cc82 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -209,7 +209,7 @@ ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission p | |||
| 209 | return ERR_INVALID_ADDRESS; | 209 | return ERR_INVALID_ADDRESS; |
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its | 212 | // Expansion of the linear heap is only allowed if you do an allocation immediately at its |
| 213 | // end. It's possible to free gaps in the middle of the heap and then reallocate them later, | 213 | // end. It's possible to free gaps in the middle of the heap and then reallocate them later, |
| 214 | // but expansions are only allowed at the end. | 214 | // but expansions are only allowed at the end. |
| 215 | if (target == heap_end) { | 215 | if (target == heap_end) { |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index a06afef2b..d781ef32c 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -142,8 +142,11 @@ public: | |||
| 142 | 142 | ||
| 143 | MemoryRegionInfo* memory_region = nullptr; | 143 | MemoryRegionInfo* memory_region = nullptr; |
| 144 | 144 | ||
| 145 | /// Bitmask of the used TLS slots | 145 | /// The Thread Local Storage area is allocated as processes create threads, |
| 146 | std::bitset<300> used_tls_slots; | 146 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part |
| 147 | /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask. | ||
| 148 | /// This vector will grow as more pages are allocated for new threads. | ||
| 149 | std::vector<std::bitset<8>> tls_slots; | ||
| 147 | 150 | ||
| 148 | VAddr GetLinearHeapAreaAddress() const; | 151 | VAddr GetLinearHeapAreaAddress() const; |
| 149 | VAddr GetLinearHeapBase() const; | 152 | VAddr GetLinearHeapBase() const; |
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index d90f0f00f..6a22c8986 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | 8 | ||
| 9 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| 10 | #include "core/hle/kernel/memory.h" | ||
| 10 | #include "core/hle/kernel/shared_memory.h" | 11 | #include "core/hle/kernel/shared_memory.h" |
| 11 | 12 | ||
| 12 | namespace Kernel { | 13 | namespace Kernel { |
| @@ -14,93 +15,157 @@ namespace Kernel { | |||
| 14 | SharedMemory::SharedMemory() {} | 15 | SharedMemory::SharedMemory() {} |
| 15 | SharedMemory::~SharedMemory() {} | 16 | SharedMemory::~SharedMemory() {} |
| 16 | 17 | ||
| 17 | SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissions, | 18 | SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions, |
| 18 | MemoryPermission other_permissions, std::string name) { | 19 | MemoryPermission other_permissions, VAddr address, MemoryRegion region, std::string name) { |
| 19 | SharedPtr<SharedMemory> shared_memory(new SharedMemory); | 20 | SharedPtr<SharedMemory> shared_memory(new SharedMemory); |
| 20 | 21 | ||
| 22 | shared_memory->owner_process = owner_process; | ||
| 21 | shared_memory->name = std::move(name); | 23 | shared_memory->name = std::move(name); |
| 22 | shared_memory->base_address = 0x0; | ||
| 23 | shared_memory->fixed_address = 0x0; | ||
| 24 | shared_memory->size = size; | 24 | shared_memory->size = size; |
| 25 | shared_memory->permissions = permissions; | 25 | shared_memory->permissions = permissions; |
| 26 | shared_memory->other_permissions = other_permissions; | 26 | shared_memory->other_permissions = other_permissions; |
| 27 | 27 | ||
| 28 | if (address == 0) { | ||
| 29 | // We need to allocate a block from the Linear Heap ourselves. | ||
| 30 | // We'll manually allocate some memory from the linear heap in the specified region. | ||
| 31 | MemoryRegionInfo* memory_region = GetMemoryRegion(region); | ||
| 32 | auto& linheap_memory = memory_region->linear_heap_memory; | ||
| 33 | |||
| 34 | ASSERT_MSG(linheap_memory->size() + size <= memory_region->size, "Not enough space in region to allocate shared memory!"); | ||
| 35 | |||
| 36 | shared_memory->backing_block = linheap_memory; | ||
| 37 | shared_memory->backing_block_offset = linheap_memory->size(); | ||
| 38 | // Allocate some memory from the end of the linear heap for this region. | ||
| 39 | linheap_memory->insert(linheap_memory->end(), size, 0); | ||
| 40 | memory_region->used += size; | ||
| 41 | |||
| 42 | shared_memory->linear_heap_phys_address = Memory::FCRAM_PADDR + memory_region->base + shared_memory->backing_block_offset; | ||
| 43 | |||
| 44 | // Increase the amount of used linear heap memory for the owner process. | ||
| 45 | if (shared_memory->owner_process != nullptr) { | ||
| 46 | shared_memory->owner_process->linear_heap_used += size; | ||
| 47 | } | ||
| 48 | |||
| 49 | // Refresh the address mappings for the current process. | ||
| 50 | if (Kernel::g_current_process != nullptr) { | ||
| 51 | Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); | ||
| 52 | } | ||
| 53 | } else { | ||
| 54 | // TODO(Subv): What happens if an application tries to create multiple memory blocks pointing to the same address? | ||
| 55 | auto& vm_manager = shared_memory->owner_process->vm_manager; | ||
| 56 | // The memory is already available and mapped in the owner process. | ||
| 57 | auto vma = vm_manager.FindVMA(address)->second; | ||
| 58 | // Copy it over to our own storage | ||
| 59 | shared_memory->backing_block = std::make_shared<std::vector<u8>>(vma.backing_block->data() + vma.offset, | ||
| 60 | vma.backing_block->data() + vma.offset + size); | ||
| 61 | shared_memory->backing_block_offset = 0; | ||
| 62 | // Unmap the existing pages | ||
| 63 | vm_manager.UnmapRange(address, size); | ||
| 64 | // Map our own block into the address space | ||
| 65 | vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size, MemoryState::Shared); | ||
| 66 | // Reprotect the block with the new permissions | ||
| 67 | vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions)); | ||
| 68 | } | ||
| 69 | |||
| 70 | shared_memory->base_address = address; | ||
| 28 | return shared_memory; | 71 | return shared_memory; |
| 29 | } | 72 | } |
| 30 | 73 | ||
| 31 | ResultCode SharedMemory::Map(VAddr address, MemoryPermission permissions, | 74 | SharedPtr<SharedMemory> SharedMemory::CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size, |
| 32 | MemoryPermission other_permissions) { | 75 | MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { |
| 76 | SharedPtr<SharedMemory> shared_memory(new SharedMemory); | ||
| 33 | 77 | ||
| 34 | if (base_address != 0) { | 78 | shared_memory->owner_process = nullptr; |
| 35 | LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: already mapped at 0x%08X!", | 79 | shared_memory->name = std::move(name); |
| 36 | GetObjectId(), address, name.c_str(), base_address); | 80 | shared_memory->size = size; |
| 37 | // TODO: Verify error code with hardware | 81 | shared_memory->permissions = permissions; |
| 38 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, | 82 | shared_memory->other_permissions = other_permissions; |
| 39 | ErrorSummary::InvalidArgument, ErrorLevel::Permanent); | 83 | shared_memory->backing_block = heap_block; |
| 40 | } | 84 | shared_memory->backing_block_offset = offset; |
| 85 | shared_memory->base_address = Memory::HEAP_VADDR + offset; | ||
| 41 | 86 | ||
| 42 | // TODO(Subv): Return E0E01BEE when permissions and other_permissions don't | 87 | return shared_memory; |
| 43 | // match what was specified when the memory block was created. | 88 | } |
| 44 | 89 | ||
| 45 | // TODO(Subv): Return E0E01BEE when address should be 0. | 90 | ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermission permissions, |
| 46 | // Note: Find out when that's the case. | 91 | MemoryPermission other_permissions) { |
| 47 | 92 | ||
| 48 | if (fixed_address != 0) { | 93 | MemoryPermission own_other_permissions = target_process == owner_process ? this->permissions : this->other_permissions; |
| 49 | if (address != 0 && address != fixed_address) { | ||
| 50 | LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: fixed_addres is 0x%08X!", | ||
| 51 | GetObjectId(), address, name.c_str(), fixed_address); | ||
| 52 | // TODO: Verify error code with hardware | ||
| 53 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, | ||
| 54 | ErrorSummary::InvalidArgument, ErrorLevel::Permanent); | ||
| 55 | } | ||
| 56 | 94 | ||
| 57 | // HACK(yuriks): This is only here to support the APT shared font mapping right now. | 95 | // Automatically allocated memory blocks can only be mapped with other_permissions = DontCare |
| 58 | // Later, this should actually map the memory block onto the address space. | 96 | if (base_address == 0 && other_permissions != MemoryPermission::DontCare) { |
| 59 | return RESULT_SUCCESS; | 97 | return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); |
| 60 | } | 98 | } |
| 61 | 99 | ||
| 62 | if (address < Memory::SHARED_MEMORY_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) { | 100 | // Error out if the requested permissions don't match what the creator process allows. |
| 63 | LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s outside of shared mem bounds!", | 101 | if (static_cast<u32>(permissions) & ~static_cast<u32>(own_other_permissions)) { |
| 64 | GetObjectId(), address, name.c_str()); | 102 | LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match", |
| 65 | // TODO: Verify error code with hardware | 103 | GetObjectId(), address, name.c_str()); |
| 66 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, | 104 | return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); |
| 67 | ErrorSummary::InvalidArgument, ErrorLevel::Permanent); | ||
| 68 | } | 105 | } |
| 69 | 106 | ||
| 70 | // TODO: Test permissions | 107 | // Heap-backed memory blocks can not be mapped with other_permissions = DontCare |
| 108 | if (base_address != 0 && other_permissions == MemoryPermission::DontCare) { | ||
| 109 | LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match", | ||
| 110 | GetObjectId(), address, name.c_str()); | ||
| 111 | return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); | ||
| 112 | } | ||
| 71 | 113 | ||
| 72 | // HACK: Since there's no way to write to the memory block without mapping it onto the game | 114 | // Error out if the provided permissions are not compatible with what the creator process needs. |
| 73 | // process yet, at least initialize memory the first time it's mapped. | 115 | if (other_permissions != MemoryPermission::DontCare && |
| 74 | if (address != this->base_address) { | 116 | static_cast<u32>(this->permissions) & ~static_cast<u32>(other_permissions)) { |
| 75 | std::memset(Memory::GetPointer(address), 0, size); | 117 | LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match", |
| 118 | GetObjectId(), address, name.c_str()); | ||
| 119 | return ResultCode(ErrorDescription::WrongPermission, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent); | ||
| 76 | } | 120 | } |
| 77 | 121 | ||
| 78 | this->base_address = address; | 122 | // TODO(Subv): Check for the Shared Device Mem flag in the creator process. |
| 123 | /*if (was_created_with_shared_device_mem && address != 0) { | ||
| 124 | return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); | ||
| 125 | }*/ | ||
| 79 | 126 | ||
| 80 | return RESULT_SUCCESS; | 127 | // TODO(Subv): The same process that created a SharedMemory object |
| 81 | } | 128 | // can not map it in its own address space unless it was created with addr=0, result 0xD900182C. |
| 82 | 129 | ||
| 83 | ResultCode SharedMemory::Unmap(VAddr address) { | 130 | if (address != 0) { |
| 84 | if (base_address == 0) { | 131 | if (address < Memory::HEAP_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) { |
| 85 | // TODO(Subv): Verify what actually happens when you want to unmap a memory block that | 132 | LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, invalid address", |
| 86 | // was originally mapped with address = 0 | 133 | GetObjectId(), address, name.c_str()); |
| 87 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); | 134 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, |
| 135 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); | ||
| 136 | } | ||
| 88 | } | 137 | } |
| 89 | 138 | ||
| 90 | if (base_address != address) | 139 | VAddr target_address = address; |
| 91 | return ResultCode(ErrorDescription::WrongAddress, ErrorModule::OS, ErrorSummary::InvalidState, ErrorLevel::Usage); | ||
| 92 | 140 | ||
| 93 | base_address = 0; | 141 | if (base_address == 0 && target_address == 0) { |
| 142 | // Calculate the address at which to map the memory block. | ||
| 143 | target_address = Memory::PhysicalToVirtualAddress(linear_heap_phys_address); | ||
| 144 | } | ||
| 145 | |||
| 146 | // Map the memory block into the target process | ||
| 147 | auto result = target_process->vm_manager.MapMemoryBlock(target_address, backing_block, backing_block_offset, size, MemoryState::Shared); | ||
| 148 | if (result.Failed()) { | ||
| 149 | LOG_ERROR(Kernel, "cannot map id=%u, target_address=0x%08X name=%s, error mapping to virtual memory", | ||
| 150 | GetObjectId(), target_address, name.c_str()); | ||
| 151 | return result.Code(); | ||
| 152 | } | ||
| 94 | 153 | ||
| 95 | return RESULT_SUCCESS; | 154 | return target_process->vm_manager.ReprotectRange(target_address, size, ConvertPermissions(permissions)); |
| 96 | } | 155 | } |
| 97 | 156 | ||
| 98 | u8* SharedMemory::GetPointer(u32 offset) { | 157 | ResultCode SharedMemory::Unmap(Process* target_process, VAddr address) { |
| 99 | if (base_address != 0) | 158 | // TODO(Subv): Verify what happens if the application tries to unmap an address that is not mapped to a SharedMemory. |
| 100 | return Memory::GetPointer(base_address + offset); | 159 | return target_process->vm_manager.UnmapRange(address, size); |
| 160 | } | ||
| 161 | |||
| 162 | VMAPermission SharedMemory::ConvertPermissions(MemoryPermission permission) { | ||
| 163 | u32 masked_permissions = static_cast<u32>(permission) & static_cast<u32>(MemoryPermission::ReadWriteExecute); | ||
| 164 | return static_cast<VMAPermission>(masked_permissions); | ||
| 165 | }; | ||
| 101 | 166 | ||
| 102 | LOG_ERROR(Kernel_SVC, "memory block id=%u not mapped!", GetObjectId()); | 167 | u8* SharedMemory::GetPointer(u32 offset) { |
| 103 | return nullptr; | 168 | return backing_block->data() + backing_block_offset + offset; |
| 104 | } | 169 | } |
| 105 | 170 | ||
| 106 | } // namespace | 171 | } // namespace |
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index b51049ad0..0c404a9f8 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | 10 | ||
| 11 | #include "core/hle/kernel/kernel.h" | 11 | #include "core/hle/kernel/kernel.h" |
| 12 | #include "core/hle/kernel/process.h" | ||
| 12 | #include "core/hle/result.h" | 13 | #include "core/hle/result.h" |
| 13 | 14 | ||
| 14 | namespace Kernel { | 15 | namespace Kernel { |
| @@ -29,14 +30,29 @@ enum class MemoryPermission : u32 { | |||
| 29 | class SharedMemory final : public Object { | 30 | class SharedMemory final : public Object { |
| 30 | public: | 31 | public: |
| 31 | /** | 32 | /** |
| 32 | * Creates a shared memory object | 33 | * Creates a shared memory object. |
| 34 | * @param owner_process Process that created this shared memory object. | ||
| 33 | * @param size Size of the memory block. Must be page-aligned. | 35 | * @param size Size of the memory block. Must be page-aligned. |
| 34 | * @param permissions Permission restrictions applied to the process which created the block. | 36 | * @param permissions Permission restrictions applied to the process which created the block. |
| 35 | * @param other_permissions Permission restrictions applied to other processes mapping the block. | 37 | * @param other_permissions Permission restrictions applied to other processes mapping the block. |
| 38 | * @param address The address from which to map the Shared Memory. | ||
| 39 | * @param region If the address is 0, the shared memory will be allocated in this region of the linear heap. | ||
| 36 | * @param name Optional object name, used for debugging purposes. | 40 | * @param name Optional object name, used for debugging purposes. |
| 37 | */ | 41 | */ |
| 38 | static SharedPtr<SharedMemory> Create(u32 size, MemoryPermission permissions, | 42 | static SharedPtr<SharedMemory> Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions, |
| 39 | MemoryPermission other_permissions, std::string name = "Unknown"); | 43 | MemoryPermission other_permissions, VAddr address = 0, MemoryRegion region = MemoryRegion::BASE, std::string name = "Unknown"); |
| 44 | |||
| 45 | /** | ||
| 46 | * Creates a shared memory object from a block of memory managed by an HLE applet. | ||
| 47 | * @param heap_block Heap block of the HLE applet. | ||
| 48 | * @param offset The offset into the heap block that the SharedMemory will map. | ||
| 49 | * @param size Size of the memory block. Must be page-aligned. | ||
| 50 | * @param permissions Permission restrictions applied to the process which created the block. | ||
| 51 | * @param other_permissions Permission restrictions applied to other processes mapping the block. | ||
| 52 | * @param name Optional object name, used for debugging purposes. | ||
| 53 | */ | ||
| 54 | static SharedPtr<SharedMemory> CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size, | ||
| 55 | MemoryPermission permissions, MemoryPermission other_permissions, std::string name = "Unknown Applet"); | ||
| 40 | 56 | ||
| 41 | std::string GetTypeName() const override { return "SharedMemory"; } | 57 | std::string GetTypeName() const override { return "SharedMemory"; } |
| 42 | std::string GetName() const override { return name; } | 58 | std::string GetName() const override { return name; } |
| @@ -45,19 +61,27 @@ public: | |||
| 45 | HandleType GetHandleType() const override { return HANDLE_TYPE; } | 61 | HandleType GetHandleType() const override { return HANDLE_TYPE; } |
| 46 | 62 | ||
| 47 | /** | 63 | /** |
| 48 | * Maps a shared memory block to an address in system memory | 64 | * Converts the specified MemoryPermission into the equivalent VMAPermission. |
| 65 | * @param permission The MemoryPermission to convert. | ||
| 66 | */ | ||
| 67 | static VMAPermission ConvertPermissions(MemoryPermission permission); | ||
| 68 | |||
| 69 | /** | ||
| 70 | * Maps a shared memory block to an address in the target process' address space | ||
| 71 | * @param target_process Process on which to map the memory block. | ||
| 49 | * @param address Address in system memory to map shared memory block to | 72 | * @param address Address in system memory to map shared memory block to |
| 50 | * @param permissions Memory block map permissions (specified by SVC field) | 73 | * @param permissions Memory block map permissions (specified by SVC field) |
| 51 | * @param other_permissions Memory block map other permissions (specified by SVC field) | 74 | * @param other_permissions Memory block map other permissions (specified by SVC field) |
| 52 | */ | 75 | */ |
| 53 | ResultCode Map(VAddr address, MemoryPermission permissions, MemoryPermission other_permissions); | 76 | ResultCode Map(Process* target_process, VAddr address, MemoryPermission permissions, MemoryPermission other_permissions); |
| 54 | 77 | ||
| 55 | /** | 78 | /** |
| 56 | * Unmaps a shared memory block from the specified address in system memory | 79 | * Unmaps a shared memory block from the specified address in system memory |
| 80 | * @param target_process Process from which to umap the memory block. | ||
| 57 | * @param address Address in system memory where the shared memory block is mapped | 81 | * @param address Address in system memory where the shared memory block is mapped |
| 58 | * @return Result code of the unmap operation | 82 | * @return Result code of the unmap operation |
| 59 | */ | 83 | */ |
| 60 | ResultCode Unmap(VAddr address); | 84 | ResultCode Unmap(Process* target_process, VAddr address); |
| 61 | 85 | ||
| 62 | /** | 86 | /** |
| 63 | * Gets a pointer to the shared memory block | 87 | * Gets a pointer to the shared memory block |
| @@ -66,10 +90,16 @@ public: | |||
| 66 | */ | 90 | */ |
| 67 | u8* GetPointer(u32 offset = 0); | 91 | u8* GetPointer(u32 offset = 0); |
| 68 | 92 | ||
| 69 | /// Address of shared memory block in the process. | 93 | /// Process that created this shared memory block. |
| 94 | SharedPtr<Process> owner_process; | ||
| 95 | /// Address of shared memory block in the owner process if specified. | ||
| 70 | VAddr base_address; | 96 | VAddr base_address; |
| 71 | /// Fixed address to allow mapping to. Used for blocks created from the linear heap. | 97 | /// Physical address of the shared memory block in the linear heap if no address was specified during creation. |
| 72 | VAddr fixed_address; | 98 | PAddr linear_heap_phys_address; |
| 99 | /// Backing memory for this shared memory block. | ||
| 100 | std::shared_ptr<std::vector<u8>> backing_block; | ||
| 101 | /// Offset into the backing block for this shared memory. | ||
| 102 | u32 backing_block_offset; | ||
| 73 | /// Size of the memory block. Page-aligned. | 103 | /// Size of the memory block. Page-aligned. |
| 74 | u32 size; | 104 | u32 size; |
| 75 | /// Permission restrictions applied to the process which created the block. | 105 | /// Permission restrictions applied to the process which created the block. |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 6dc95d0f1..68f026918 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -117,9 +117,10 @@ void Thread::Stop() { | |||
| 117 | } | 117 | } |
| 118 | wait_objects.clear(); | 118 | wait_objects.clear(); |
| 119 | 119 | ||
| 120 | Kernel::g_current_process->used_tls_slots[tls_index] = false; | 120 | // Mark the TLS slot in the thread's page as free. |
| 121 | g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; | 121 | u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE; |
| 122 | g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE; | 122 | u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE; |
| 123 | Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot); | ||
| 123 | 124 | ||
| 124 | HLE::Reschedule(__func__); | 125 | HLE::Reschedule(__func__); |
| 125 | } | 126 | } |
| @@ -366,6 +367,31 @@ static void DebugThreadQueue() { | |||
| 366 | } | 367 | } |
| 367 | } | 368 | } |
| 368 | 369 | ||
| 370 | /** | ||
| 371 | * Finds a free location for the TLS section of a thread. | ||
| 372 | * @param tls_slots The TLS page array of the thread's owner process. | ||
| 373 | * Returns a tuple of (page, slot, alloc_needed) where: | ||
| 374 | * page: The index of the first allocated TLS page that has free slots. | ||
| 375 | * slot: The index of the first free slot in the indicated page. | ||
| 376 | * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full). | ||
| 377 | */ | ||
| 378 | std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) { | ||
| 379 | // Iterate over all the allocated pages, and try to find one where not all slots are used. | ||
| 380 | for (unsigned page = 0; page < tls_slots.size(); ++page) { | ||
| 381 | const auto& page_tls_slots = tls_slots[page]; | ||
| 382 | if (!page_tls_slots.all()) { | ||
| 383 | // We found a page with at least one free slot, find which slot it is | ||
| 384 | for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) { | ||
| 385 | if (!page_tls_slots.test(slot)) { | ||
| 386 | return std::make_tuple(page, slot, false); | ||
| 387 | } | ||
| 388 | } | ||
| 389 | } | ||
| 390 | } | ||
| 391 | |||
| 392 | return std::make_tuple(0, 0, true); | ||
| 393 | } | ||
| 394 | |||
| 369 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, | 395 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, |
| 370 | u32 arg, s32 processor_id, VAddr stack_top) { | 396 | u32 arg, s32 processor_id, VAddr stack_top) { |
| 371 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { | 397 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { |
| @@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, | |||
| 403 | thread->name = std::move(name); | 429 | thread->name = std::move(name); |
| 404 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); | 430 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); |
| 405 | thread->owner_process = g_current_process; | 431 | thread->owner_process = g_current_process; |
| 406 | thread->tls_index = -1; | ||
| 407 | thread->waitsynch_waited = false; | 432 | thread->waitsynch_waited = false; |
| 408 | 433 | ||
| 409 | // Find the next available TLS index, and mark it as used | 434 | // Find the next available TLS index, and mark it as used |
| 410 | auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; | 435 | auto& tls_slots = Kernel::g_current_process->tls_slots; |
| 411 | for (unsigned int i = 0; i < used_tls_slots.size(); ++i) { | 436 | bool needs_allocation = true; |
| 412 | if (used_tls_slots[i] == false) { | 437 | u32 available_page; // Which allocated page has free space |
| 413 | thread->tls_index = i; | 438 | u32 available_slot; // Which slot within the page is free |
| 414 | used_tls_slots[i] = true; | 439 | |
| 415 | break; | 440 | std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots); |
| 441 | |||
| 442 | if (needs_allocation) { | ||
| 443 | // There are no already-allocated pages with free slots, lets allocate a new one. | ||
| 444 | // TLS pages are allocated from the BASE region in the linear heap. | ||
| 445 | MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE); | ||
| 446 | auto& linheap_memory = memory_region->linear_heap_memory; | ||
| 447 | |||
| 448 | if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) { | ||
| 449 | LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread"); | ||
| 450 | return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent); | ||
| 416 | } | 451 | } |
| 452 | |||
| 453 | u32 offset = linheap_memory->size(); | ||
| 454 | |||
| 455 | // Allocate some memory from the end of the linear heap for this region. | ||
| 456 | linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0); | ||
| 457 | memory_region->used += Memory::PAGE_SIZE; | ||
| 458 | Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE; | ||
| 459 | |||
| 460 | tls_slots.emplace_back(0); // The page is completely available at the start | ||
| 461 | available_page = tls_slots.size() - 1; | ||
| 462 | available_slot = 0; // Use the first slot in the new page | ||
| 463 | |||
| 464 | auto& vm_manager = Kernel::g_current_process->vm_manager; | ||
| 465 | vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); | ||
| 466 | |||
| 467 | // Map the page to the current process' address space. | ||
| 468 | // TODO(Subv): Find the correct MemoryState for this region. | ||
| 469 | vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE, | ||
| 470 | linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private); | ||
| 417 | } | 471 | } |
| 418 | 472 | ||
| 419 | ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); | 473 | // Mark the slot as used |
| 420 | g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; | 474 | tls_slots[available_page].set(available_slot); |
| 421 | g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE; | 475 | thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE; |
| 422 | 476 | ||
| 423 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used | 477 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used |
| 424 | // to initialize the context | 478 | // to initialize the context |
| @@ -509,10 +563,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { | |||
| 509 | context.cpu_registers[1] = output; | 563 | context.cpu_registers[1] = output; |
| 510 | } | 564 | } |
| 511 | 565 | ||
| 512 | VAddr Thread::GetTLSAddress() const { | ||
| 513 | return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE; | ||
| 514 | } | ||
| 515 | |||
| 516 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 566 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 517 | 567 | ||
| 518 | void ThreadingInit() { | 568 | void ThreadingInit() { |
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 97ba57fc5..deab5d5a6 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -127,7 +127,7 @@ public: | |||
| 127 | * Returns the Thread Local Storage address of the current thread | 127 | * Returns the Thread Local Storage address of the current thread |
| 128 | * @returns VAddr of the thread's TLS | 128 | * @returns VAddr of the thread's TLS |
| 129 | */ | 129 | */ |
| 130 | VAddr GetTLSAddress() const; | 130 | VAddr GetTLSAddress() const { return tls_address; } |
| 131 | 131 | ||
| 132 | Core::ThreadContext context; | 132 | Core::ThreadContext context; |
| 133 | 133 | ||
| @@ -144,7 +144,7 @@ public: | |||
| 144 | 144 | ||
| 145 | s32 processor_id; | 145 | s32 processor_id; |
| 146 | 146 | ||
| 147 | s32 tls_index; ///< Index of the Thread Local Storage of the thread | 147 | VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread |
| 148 | 148 | ||
| 149 | bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait | 149 | bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait |
| 150 | 150 | ||
diff --git a/src/core/hle/result.h b/src/core/hle/result.h index 3fc1ab4ee..bfb3327ce 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | /// Detailed description of the error. This listing is likely incomplete. | 17 | /// Detailed description of the error. This listing is likely incomplete. |
| 18 | enum class ErrorDescription : u32 { | 18 | enum class ErrorDescription : u32 { |
| 19 | Success = 0, | 19 | Success = 0, |
| 20 | WrongPermission = 46, | ||
| 20 | OS_InvalidBufferDescriptor = 48, | 21 | OS_InvalidBufferDescriptor = 48, |
| 21 | WrongAddress = 53, | 22 | WrongAddress = 53, |
| 22 | FS_NotFound = 120, | 23 | FS_NotFound = 120, |
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp index 6d72e8188..73fce6079 100644 --- a/src/core/hle/service/apt/apt.cpp +++ b/src/core/hle/service/apt/apt.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "core/hle/service/apt/apt_a.h" | 12 | #include "core/hle/service/apt/apt_a.h" |
| 13 | #include "core/hle/service/apt/apt_s.h" | 13 | #include "core/hle/service/apt/apt_s.h" |
| 14 | #include "core/hle/service/apt/apt_u.h" | 14 | #include "core/hle/service/apt/apt_u.h" |
| 15 | #include "core/hle/service/apt/bcfnt/bcfnt.h" | ||
| 15 | #include "core/hle/service/fs/archive.h" | 16 | #include "core/hle/service/fs/archive.h" |
| 16 | 17 | ||
| 17 | #include "core/hle/kernel/event.h" | 18 | #include "core/hle/kernel/event.h" |
| @@ -22,23 +23,14 @@ | |||
| 22 | namespace Service { | 23 | namespace Service { |
| 23 | namespace APT { | 24 | namespace APT { |
| 24 | 25 | ||
| 25 | // Address used for shared font (as observed on HW) | ||
| 26 | // TODO(bunnei): This is the hard-coded address where we currently dump the shared font from via | ||
| 27 | // https://github.com/citra-emu/3dsutils. This is technically a hack, and will not work at any | ||
| 28 | // address other than 0x18000000 due to internal pointers in the shared font dump that would need to | ||
| 29 | // be relocated. This might be fixed by dumping the shared font @ address 0x00000000 and then | ||
| 30 | // correctly mapping it in Citra, however we still do not understand how the mapping is determined. | ||
| 31 | static const VAddr SHARED_FONT_VADDR = 0x18000000; | ||
| 32 | |||
| 33 | /// Handle to shared memory region designated to for shared system font | 26 | /// Handle to shared memory region designated to for shared system font |
| 34 | static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; | 27 | static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; |
| 28 | static bool shared_font_relocated = false; | ||
| 35 | 29 | ||
| 36 | static Kernel::SharedPtr<Kernel::Mutex> lock; | 30 | static Kernel::SharedPtr<Kernel::Mutex> lock; |
| 37 | static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event | 31 | static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event |
| 38 | static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event | 32 | static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event |
| 39 | 33 | ||
| 40 | static std::shared_ptr<std::vector<u8>> shared_font; | ||
| 41 | |||
| 42 | static u32 cpu_percent; ///< CPU time available to the running application | 34 | static u32 cpu_percent; ///< CPU time available to the running application |
| 43 | 35 | ||
| 44 | /// Parameter data to be returned in the next call to Glance/ReceiveParameter | 36 | /// Parameter data to be returned in the next call to Glance/ReceiveParameter |
| @@ -74,23 +66,25 @@ void Initialize(Service::Interface* self) { | |||
| 74 | void GetSharedFont(Service::Interface* self) { | 66 | void GetSharedFont(Service::Interface* self) { |
| 75 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 67 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 76 | 68 | ||
| 77 | if (shared_font != nullptr) { | 69 | // The shared font has to be relocated to the new address before being passed to the application. |
| 78 | // TODO(yuriks): This is a hack to keep this working right now even with our completely | 70 | VAddr target_address = Memory::PhysicalToVirtualAddress(shared_font_mem->linear_heap_phys_address); |
| 79 | // broken shared memory system. | 71 | // The shared font dumped by 3dsutils (https://github.com/citra-emu/3dsutils) uses this address as base, |
| 80 | shared_font_mem->fixed_address = SHARED_FONT_VADDR; | 72 | // so we relocate it from there to our real address. |
| 81 | Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->fixed_address, | 73 | // TODO(Subv): This address is wrong if the shared font is dumped from a n3DS, |
| 82 | shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared); | 74 | // we need a way to automatically calculate the original address of the font from the file. |
| 83 | 75 | static const VAddr SHARED_FONT_VADDR = 0x18000000; | |
| 84 | cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); | 76 | if (!shared_font_relocated) { |
| 85 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 77 | BCFNT::RelocateSharedFont(shared_font_mem, SHARED_FONT_VADDR, target_address); |
| 86 | cmd_buff[2] = SHARED_FONT_VADDR; | 78 | shared_font_relocated = true; |
| 87 | cmd_buff[3] = IPC::MoveHandleDesc(); | ||
| 88 | cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom(); | ||
| 89 | } else { | ||
| 90 | cmd_buff[0] = IPC::MakeHeader(0x44, 1, 0); | ||
| 91 | cmd_buff[1] = -1; // Generic error (not really possible to verify this on hardware) | ||
| 92 | LOG_ERROR(Kernel_SVC, "called, but %s has not been loaded!", SHARED_FONT); | ||
| 93 | } | 79 | } |
| 80 | cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); | ||
| 81 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 82 | // Since the SharedMemory interface doesn't provide the address at which the memory was allocated, | ||
| 83 | // the real APT service calculates this address by scanning the entire address space (using svcQueryMemory) | ||
| 84 | // and searches for an allocation of the same size as the Shared Font. | ||
| 85 | cmd_buff[2] = target_address; | ||
| 86 | cmd_buff[3] = IPC::MoveHandleDesc(); | ||
| 87 | cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom(); | ||
| 94 | } | 88 | } |
| 95 | 89 | ||
| 96 | void NotifyToWait(Service::Interface* self) { | 90 | void NotifyToWait(Service::Interface* self) { |
| @@ -433,14 +427,12 @@ void Init() { | |||
| 433 | FileUtil::IOFile file(filepath, "rb"); | 427 | FileUtil::IOFile file(filepath, "rb"); |
| 434 | 428 | ||
| 435 | if (file.IsOpen()) { | 429 | if (file.IsOpen()) { |
| 436 | // Read shared font data | ||
| 437 | shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize()); | ||
| 438 | file.ReadBytes(shared_font->data(), shared_font->size()); | ||
| 439 | |||
| 440 | // Create shared font memory object | 430 | // Create shared font memory object |
| 441 | using Kernel::MemoryPermission; | 431 | using Kernel::MemoryPermission; |
| 442 | shared_font_mem = Kernel::SharedMemory::Create(3 * 1024 * 1024, // 3MB | 432 | shared_font_mem = Kernel::SharedMemory::Create(nullptr, 0x332000, // 3272 KB |
| 443 | MemoryPermission::ReadWrite, MemoryPermission::Read, "APT_U:shared_font_mem"); | 433 | MemoryPermission::ReadWrite, MemoryPermission::Read, 0, Kernel::MemoryRegion::SYSTEM, "APT:SharedFont"); |
| 434 | // Read shared font data | ||
| 435 | file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize()); | ||
| 444 | } else { | 436 | } else { |
| 445 | LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str()); | 437 | LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str()); |
| 446 | shared_font_mem = nullptr; | 438 | shared_font_mem = nullptr; |
| @@ -459,8 +451,8 @@ void Init() { | |||
| 459 | } | 451 | } |
| 460 | 452 | ||
| 461 | void Shutdown() { | 453 | void Shutdown() { |
| 462 | shared_font = nullptr; | ||
| 463 | shared_font_mem = nullptr; | 454 | shared_font_mem = nullptr; |
| 455 | shared_font_relocated = false; | ||
| 464 | lock = nullptr; | 456 | lock = nullptr; |
| 465 | notification_event = nullptr; | 457 | notification_event = nullptr; |
| 466 | parameter_event = nullptr; | 458 | parameter_event = nullptr; |
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.cpp b/src/core/hle/service/apt/bcfnt/bcfnt.cpp new file mode 100644 index 000000000..b0d39d4a5 --- /dev/null +++ b/src/core/hle/service/apt/bcfnt/bcfnt.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/hle/service/apt/bcfnt/bcfnt.h" | ||
| 6 | #include "core/hle/service/service.h" | ||
| 7 | |||
| 8 | namespace Service { | ||
| 9 | namespace APT { | ||
| 10 | namespace BCFNT { | ||
| 11 | |||
| 12 | void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address) { | ||
| 13 | static const u32 SharedFontStartOffset = 0x80; | ||
| 14 | u8* data = shared_font->GetPointer(SharedFontStartOffset); | ||
| 15 | |||
| 16 | CFNT cfnt; | ||
| 17 | memcpy(&cfnt, data, sizeof(cfnt)); | ||
| 18 | |||
| 19 | // Advance past the header | ||
| 20 | data = shared_font->GetPointer(SharedFontStartOffset + cfnt.header_size); | ||
| 21 | |||
| 22 | for (unsigned block = 0; block < cfnt.num_blocks; ++block) { | ||
| 23 | |||
| 24 | u32 section_size = 0; | ||
| 25 | if (memcmp(data, "FINF", 4) == 0) { | ||
| 26 | BCFNT::FINF finf; | ||
| 27 | memcpy(&finf, data, sizeof(finf)); | ||
| 28 | section_size = finf.section_size; | ||
| 29 | |||
| 30 | // Relocate the offsets in the FINF section | ||
| 31 | finf.cmap_offset += new_address - previous_address; | ||
| 32 | finf.cwdh_offset += new_address - previous_address; | ||
| 33 | finf.tglp_offset += new_address - previous_address; | ||
| 34 | |||
| 35 | memcpy(data, &finf, sizeof(finf)); | ||
| 36 | } else if (memcmp(data, "CMAP", 4) == 0) { | ||
| 37 | BCFNT::CMAP cmap; | ||
| 38 | memcpy(&cmap, data, sizeof(cmap)); | ||
| 39 | section_size = cmap.section_size; | ||
| 40 | |||
| 41 | // Relocate the offsets in the CMAP section | ||
| 42 | cmap.next_cmap_offset += new_address - previous_address; | ||
| 43 | |||
| 44 | memcpy(data, &cmap, sizeof(cmap)); | ||
| 45 | } else if (memcmp(data, "CWDH", 4) == 0) { | ||
| 46 | BCFNT::CWDH cwdh; | ||
| 47 | memcpy(&cwdh, data, sizeof(cwdh)); | ||
| 48 | section_size = cwdh.section_size; | ||
| 49 | |||
| 50 | // Relocate the offsets in the CWDH section | ||
| 51 | cwdh.next_cwdh_offset += new_address - previous_address; | ||
| 52 | |||
| 53 | memcpy(data, &cwdh, sizeof(cwdh)); | ||
| 54 | } else if (memcmp(data, "TGLP", 4) == 0) { | ||
| 55 | BCFNT::TGLP tglp; | ||
| 56 | memcpy(&tglp, data, sizeof(tglp)); | ||
| 57 | section_size = tglp.section_size; | ||
| 58 | |||
| 59 | // Relocate the offsets in the TGLP section | ||
| 60 | tglp.sheet_data_offset += new_address - previous_address; | ||
| 61 | |||
| 62 | memcpy(data, &tglp, sizeof(tglp)); | ||
| 63 | } | ||
| 64 | |||
| 65 | data += section_size; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | |||
| 69 | } // namespace BCFNT | ||
| 70 | } // namespace APT | ||
| 71 | } // namespace Service \ No newline at end of file | ||
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.h b/src/core/hle/service/apt/bcfnt/bcfnt.h new file mode 100644 index 000000000..388c6bea0 --- /dev/null +++ b/src/core/hle/service/apt/bcfnt/bcfnt.h | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/swap.h" | ||
| 8 | |||
| 9 | #include "core/hle/kernel/shared_memory.h" | ||
| 10 | #include "core/hle/service/service.h" | ||
| 11 | |||
| 12 | namespace Service { | ||
| 13 | namespace APT { | ||
| 14 | namespace BCFNT { ///< BCFNT Shared Font file structures | ||
| 15 | |||
| 16 | struct CFNT { | ||
| 17 | u8 magic[4]; | ||
| 18 | u16_le endianness; | ||
| 19 | u16_le header_size; | ||
| 20 | u32_le version; | ||
| 21 | u32_le file_size; | ||
| 22 | u32_le num_blocks; | ||
| 23 | }; | ||
| 24 | |||
| 25 | struct FINF { | ||
| 26 | u8 magic[4]; | ||
| 27 | u32_le section_size; | ||
| 28 | u8 font_type; | ||
| 29 | u8 line_feed; | ||
| 30 | u16_le alter_char_index; | ||
| 31 | u8 default_width[3]; | ||
| 32 | u8 encoding; | ||
| 33 | u32_le tglp_offset; | ||
| 34 | u32_le cwdh_offset; | ||
| 35 | u32_le cmap_offset; | ||
| 36 | u8 height; | ||
| 37 | u8 width; | ||
| 38 | u8 ascent; | ||
| 39 | u8 reserved; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct TGLP { | ||
| 43 | u8 magic[4]; | ||
| 44 | u32_le section_size; | ||
| 45 | u8 cell_width; | ||
| 46 | u8 cell_height; | ||
| 47 | u8 baseline_position; | ||
| 48 | u8 max_character_width; | ||
| 49 | u32_le sheet_size; | ||
| 50 | u16_le num_sheets; | ||
| 51 | u16_le sheet_image_format; | ||
| 52 | u16_le num_columns; | ||
| 53 | u16_le num_rows; | ||
| 54 | u16_le sheet_width; | ||
| 55 | u16_le sheet_height; | ||
| 56 | u32_le sheet_data_offset; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct CMAP { | ||
| 60 | u8 magic[4]; | ||
| 61 | u32_le section_size; | ||
| 62 | u16_le code_begin; | ||
| 63 | u16_le code_end; | ||
| 64 | u16_le mapping_method; | ||
| 65 | u16_le reserved; | ||
| 66 | u32_le next_cmap_offset; | ||
| 67 | }; | ||
| 68 | |||
| 69 | struct CWDH { | ||
| 70 | u8 magic[4]; | ||
| 71 | u32_le section_size; | ||
| 72 | u16_le start_index; | ||
| 73 | u16_le end_index; | ||
| 74 | u32_le next_cwdh_offset; | ||
| 75 | }; | ||
| 76 | |||
| 77 | /** | ||
| 78 | * Relocates the internal addresses of the BCFNT Shared Font to the new base. | ||
| 79 | * @param shared_font SharedMemory object that contains the Shared Font | ||
| 80 | * @param previous_address Previous address at which the offsets in the structure were based. | ||
| 81 | * @param new_address New base for the offsets in the structure. | ||
| 82 | */ | ||
| 83 | void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address); | ||
| 84 | |||
| 85 | } // namespace BCFNT | ||
| 86 | } // namespace APT | ||
| 87 | } // namespace Service | ||
diff --git a/src/core/hle/service/csnd_snd.cpp b/src/core/hle/service/csnd_snd.cpp index 6318bf2a7..d2bb8941c 100644 --- a/src/core/hle/service/csnd_snd.cpp +++ b/src/core/hle/service/csnd_snd.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include "common/alignment.h" | ||
| 6 | #include "core/hle/hle.h" | 7 | #include "core/hle/hle.h" |
| 7 | #include "core/hle/kernel/mutex.h" | 8 | #include "core/hle/kernel/mutex.h" |
| 8 | #include "core/hle/kernel/shared_memory.h" | 9 | #include "core/hle/kernel/shared_memory.h" |
| @@ -41,14 +42,16 @@ static Kernel::SharedPtr<Kernel::Mutex> mutex = nullptr; | |||
| 41 | void Initialize(Service::Interface* self) { | 42 | void Initialize(Service::Interface* self) { |
| 42 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 43 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 43 | 44 | ||
| 44 | shared_memory = Kernel::SharedMemory::Create(cmd_buff[1], | 45 | u32 size = Common::AlignUp(cmd_buff[1], Memory::PAGE_SIZE); |
| 45 | Kernel::MemoryPermission::ReadWrite, | 46 | using Kernel::MemoryPermission; |
| 46 | Kernel::MemoryPermission::ReadWrite, "CSNDSharedMem"); | 47 | shared_memory = Kernel::SharedMemory::Create(nullptr, size, |
| 48 | MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, | ||
| 49 | 0, Kernel::MemoryRegion::BASE, "CSND:SharedMemory"); | ||
| 47 | 50 | ||
| 48 | mutex = Kernel::Mutex::Create(false); | 51 | mutex = Kernel::Mutex::Create(false); |
| 49 | 52 | ||
| 50 | cmd_buff[1] = 0; | 53 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 51 | cmd_buff[2] = 0x4000000; | 54 | cmd_buff[2] = IPC::MoveHandleDesc(2); |
| 52 | cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom(); | 55 | cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom(); |
| 53 | cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom(); | 56 | cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom(); |
| 54 | } | 57 | } |
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp index 274fc751a..10730d7ac 100644 --- a/src/core/hle/service/dsp_dsp.cpp +++ b/src/core/hle/service/dsp_dsp.cpp | |||
| @@ -440,9 +440,9 @@ static void GetHeadphoneStatus(Service::Interface* self) { | |||
| 440 | 440 | ||
| 441 | cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0); | 441 | cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0); |
| 442 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 442 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error |
| 443 | cmd_buff[2] = 0; // Not using headphones? | 443 | cmd_buff[2] = 0; // Not using headphones |
| 444 | 444 | ||
| 445 | LOG_WARNING(Service_DSP, "(STUBBED) called"); | 445 | LOG_DEBUG(Service_DSP, "called"); |
| 446 | } | 446 | } |
| 447 | 447 | ||
| 448 | /** | 448 | /** |
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index b4c146e08..8ded9b09b 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp | |||
| @@ -335,8 +335,9 @@ static void RegisterInterruptRelayQueue(Service::Interface* self) { | |||
| 335 | g_interrupt_event->name = "GSP_GPU::interrupt_event"; | 335 | g_interrupt_event->name = "GSP_GPU::interrupt_event"; |
| 336 | 336 | ||
| 337 | using Kernel::MemoryPermission; | 337 | using Kernel::MemoryPermission; |
| 338 | g_shared_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, | 338 | g_shared_memory = Kernel::SharedMemory::Create(nullptr, 0x1000, |
| 339 | MemoryPermission::ReadWrite, "GSPSharedMem"); | 339 | MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, |
| 340 | 0, Kernel::MemoryRegion::BASE, "GSP:SharedMemory"); | ||
| 340 | 341 | ||
| 341 | Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom(); | 342 | Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom(); |
| 342 | 343 | ||
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 1053d0f40..d216cecb4 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -280,8 +280,9 @@ void Init() { | |||
| 280 | AddService(new HID_SPVR_Interface); | 280 | AddService(new HID_SPVR_Interface); |
| 281 | 281 | ||
| 282 | using Kernel::MemoryPermission; | 282 | using Kernel::MemoryPermission; |
| 283 | shared_mem = SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, | 283 | shared_mem = SharedMemory::Create(nullptr, 0x1000, |
| 284 | MemoryPermission::Read, "HID:SharedMem"); | 284 | MemoryPermission::ReadWrite, MemoryPermission::Read, |
| 285 | 0, Kernel::MemoryRegion::BASE, "HID:SharedMemory"); | ||
| 285 | 286 | ||
| 286 | next_pad_index = 0; | 287 | next_pad_index = 0; |
| 287 | next_touch_index = 0; | 288 | next_touch_index = 0; |
diff --git a/src/core/hle/service/ir/ir.cpp b/src/core/hle/service/ir/ir.cpp index 505c441c6..079a87e48 100644 --- a/src/core/hle/service/ir/ir.cpp +++ b/src/core/hle/service/ir/ir.cpp | |||
| @@ -94,8 +94,9 @@ void Init() { | |||
| 94 | AddService(new IR_User_Interface); | 94 | AddService(new IR_User_Interface); |
| 95 | 95 | ||
| 96 | using Kernel::MemoryPermission; | 96 | using Kernel::MemoryPermission; |
| 97 | shared_memory = SharedMemory::Create(0x1000, Kernel::MemoryPermission::ReadWrite, | 97 | shared_memory = SharedMemory::Create(nullptr, 0x1000, |
| 98 | Kernel::MemoryPermission::ReadWrite, "IR:SharedMemory"); | 98 | Kernel::MemoryPermission::ReadWrite, Kernel::MemoryPermission::ReadWrite, |
| 99 | 0, Kernel::MemoryRegion::BASE, "IR:SharedMemory"); | ||
| 99 | transfer_shared_memory = nullptr; | 100 | transfer_shared_memory = nullptr; |
| 100 | 101 | ||
| 101 | // Create event handle(s) | 102 | // Create event handle(s) |
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index 60c8747f3..3a53126c1 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp | |||
| @@ -99,6 +99,7 @@ static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 add | |||
| 99 | switch (operation & MEMOP_OPERATION_MASK) { | 99 | switch (operation & MEMOP_OPERATION_MASK) { |
| 100 | case MEMOP_FREE: | 100 | case MEMOP_FREE: |
| 101 | { | 101 | { |
| 102 | // TODO(Subv): What happens if an application tries to FREE a block of memory that has a SharedMemory pointing to it? | ||
| 102 | if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { | 103 | if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { |
| 103 | ResultCode result = process.HeapFree(addr0, size); | 104 | ResultCode result = process.HeapFree(addr0, size); |
| 104 | if (result.IsError()) return result; | 105 | if (result.IsError()) return result; |
| @@ -160,8 +161,6 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o | |||
| 160 | LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d", | 161 | LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d", |
| 161 | handle, addr, permissions, other_permissions); | 162 | handle, addr, permissions, other_permissions); |
| 162 | 163 | ||
| 163 | // TODO(Subv): The same process that created a SharedMemory object can not map it in its own address space | ||
| 164 | |||
| 165 | SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle); | 164 | SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle); |
| 166 | if (shared_memory == nullptr) | 165 | if (shared_memory == nullptr) |
| 167 | return ERR_INVALID_HANDLE; | 166 | return ERR_INVALID_HANDLE; |
| @@ -176,7 +175,7 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o | |||
| 176 | case MemoryPermission::WriteExecute: | 175 | case MemoryPermission::WriteExecute: |
| 177 | case MemoryPermission::ReadWriteExecute: | 176 | case MemoryPermission::ReadWriteExecute: |
| 178 | case MemoryPermission::DontCare: | 177 | case MemoryPermission::DontCare: |
| 179 | return shared_memory->Map(addr, permissions_type, | 178 | return shared_memory->Map(Kernel::g_current_process.get(), addr, permissions_type, |
| 180 | static_cast<MemoryPermission>(other_permissions)); | 179 | static_cast<MemoryPermission>(other_permissions)); |
| 181 | default: | 180 | default: |
| 182 | LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions); | 181 | LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions); |
| @@ -196,7 +195,7 @@ static ResultCode UnmapMemoryBlock(Handle handle, u32 addr) { | |||
| 196 | if (shared_memory == nullptr) | 195 | if (shared_memory == nullptr) |
| 197 | return ERR_INVALID_HANDLE; | 196 | return ERR_INVALID_HANDLE; |
| 198 | 197 | ||
| 199 | return shared_memory->Unmap(addr); | 198 | return shared_memory->Unmap(Kernel::g_current_process.get(), addr); |
| 200 | } | 199 | } |
| 201 | 200 | ||
| 202 | /// Connect to an OS service given the port name, returns the handle to the port to out | 201 | /// Connect to an OS service given the port name, returns the handle to the port to out |
| @@ -790,18 +789,44 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32 | |||
| 790 | if (size % Memory::PAGE_SIZE != 0) | 789 | if (size % Memory::PAGE_SIZE != 0) |
| 791 | return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); | 790 | return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); |
| 792 | 791 | ||
| 793 | // TODO(Subv): Return E0A01BF5 if the address is not in the application's heap | 792 | SharedPtr<SharedMemory> shared_memory = nullptr; |
| 794 | |||
| 795 | // TODO(Subv): Implement this function properly | ||
| 796 | 793 | ||
| 797 | using Kernel::MemoryPermission; | 794 | using Kernel::MemoryPermission; |
| 798 | SharedPtr<SharedMemory> shared_memory = SharedMemory::Create(size, | 795 | auto VerifyPermissions = [](MemoryPermission permission) { |
| 799 | (MemoryPermission)my_permission, (MemoryPermission)other_permission); | 796 | // SharedMemory blocks can not be created with Execute permissions |
| 800 | // Map the SharedMemory to the specified address | 797 | switch (permission) { |
| 801 | shared_memory->base_address = addr; | 798 | case MemoryPermission::None: |
| 799 | case MemoryPermission::Read: | ||
| 800 | case MemoryPermission::Write: | ||
| 801 | case MemoryPermission::ReadWrite: | ||
| 802 | case MemoryPermission::DontCare: | ||
| 803 | return true; | ||
| 804 | default: | ||
| 805 | return false; | ||
| 806 | } | ||
| 807 | }; | ||
| 808 | |||
| 809 | if (!VerifyPermissions(static_cast<MemoryPermission>(my_permission)) || | ||
| 810 | !VerifyPermissions(static_cast<MemoryPermission>(other_permission))) | ||
| 811 | return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, | ||
| 812 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); | ||
| 813 | |||
| 814 | if (addr < Memory::PROCESS_IMAGE_VADDR || addr + size > Memory::SHARED_MEMORY_VADDR_END) { | ||
| 815 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); | ||
| 816 | } | ||
| 817 | |||
| 818 | // When trying to create a memory block with address = 0, | ||
| 819 | // if the process has the Shared Device Memory flag in the exheader, | ||
| 820 | // then we have to allocate from the same region as the caller process instead of the BASE region. | ||
| 821 | Kernel::MemoryRegion region = Kernel::MemoryRegion::BASE; | ||
| 822 | if (addr == 0 && Kernel::g_current_process->flags.shared_device_mem) | ||
| 823 | region = Kernel::g_current_process->flags.memory_region; | ||
| 824 | |||
| 825 | shared_memory = SharedMemory::Create(Kernel::g_current_process, size, | ||
| 826 | static_cast<MemoryPermission>(my_permission), static_cast<MemoryPermission>(other_permission), addr, region); | ||
| 802 | CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory))); | 827 | CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory))); |
| 803 | 828 | ||
| 804 | LOG_WARNING(Kernel_SVC, "(STUBBED) called addr=0x%08X", addr); | 829 | LOG_WARNING(Kernel_SVC, "called addr=0x%08X", addr); |
| 805 | return RESULT_SUCCESS; | 830 | return RESULT_SUCCESS; |
| 806 | } | 831 | } |
| 807 | 832 | ||
diff --git a/src/core/memory.h b/src/core/memory.h index 9caa3c3f5..126d60471 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -100,15 +100,9 @@ enum : VAddr { | |||
| 100 | SHARED_PAGE_SIZE = 0x00001000, | 100 | SHARED_PAGE_SIZE = 0x00001000, |
| 101 | SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, | 101 | SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, |
| 102 | 102 | ||
| 103 | // TODO(yuriks): The size of this area is dynamic, the kernel grows | ||
| 104 | // it as more and more threads are created. For now we'll just use a | ||
| 105 | // hardcoded value. | ||
| 106 | /// Area where TLS (Thread-Local Storage) buffers are allocated. | 103 | /// Area where TLS (Thread-Local Storage) buffers are allocated. |
| 107 | TLS_AREA_VADDR = 0x1FF82000, | 104 | TLS_AREA_VADDR = 0x1FF82000, |
| 108 | TLS_ENTRY_SIZE = 0x200, | 105 | TLS_ENTRY_SIZE = 0x200, |
| 109 | TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size | ||
| 110 | TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, | ||
| 111 | |||
| 112 | 106 | ||
| 113 | /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. | 107 | /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. |
| 114 | NEW_LINEAR_HEAP_VADDR = 0x30000000, | 108 | NEW_LINEAR_HEAP_VADDR = 0x30000000, |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index e7dc5ddac..ad0da796e 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -128,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 128 | 128 | ||
| 129 | // TODO: Verify that this actually modifies the register! | 129 | // TODO: Verify that this actually modifies the register! |
| 130 | if (setup.index < 15) { | 130 | if (setup.index < 15) { |
| 131 | g_state.vs.default_attributes[setup.index] = attribute; | 131 | g_state.vs_default_attributes[setup.index] = attribute; |
| 132 | setup.index++; | 132 | setup.index++; |
| 133 | } else { | 133 | } else { |
| 134 | // Put each attribute into an immediate input buffer. | 134 | // Put each attribute into an immediate input buffer. |
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 1059c6ae4..495174c25 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -25,6 +25,8 @@ struct State { | |||
| 25 | Shader::ShaderSetup vs; | 25 | Shader::ShaderSetup vs; |
| 26 | Shader::ShaderSetup gs; | 26 | Shader::ShaderSetup gs; |
| 27 | 27 | ||
| 28 | std::array<Math::Vec4<float24>, 16> vs_default_attributes; | ||
| 29 | |||
| 28 | struct { | 30 | struct { |
| 29 | union LutEntry { | 31 | union LutEntry { |
| 30 | // Used for raw access | 32 | // Used for raw access |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4222945a4..bcd1ae78d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -104,7 +104,6 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | |||
| 104 | 104 | ||
| 105 | // Sync fixed function OpenGL state | 105 | // Sync fixed function OpenGL state |
| 106 | SyncCullMode(); | 106 | SyncCullMode(); |
| 107 | SyncDepthModifiers(); | ||
| 108 | SyncBlendEnabled(); | 107 | SyncBlendEnabled(); |
| 109 | SyncBlendFuncs(); | 108 | SyncBlendFuncs(); |
| 110 | SyncBlendColor(); | 109 | SyncBlendColor(); |
| @@ -259,8 +258,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 259 | 258 | ||
| 260 | // Depth modifiers | 259 | // Depth modifiers |
| 261 | case PICA_REG_INDEX(viewport_depth_range): | 260 | case PICA_REG_INDEX(viewport_depth_range): |
| 261 | SyncDepthScale(); | ||
| 262 | break; | ||
| 262 | case PICA_REG_INDEX(viewport_depth_near_plane): | 263 | case PICA_REG_INDEX(viewport_depth_near_plane): |
| 263 | SyncDepthModifiers(); | 264 | SyncDepthOffset(); |
| 264 | break; | 265 | break; |
| 265 | 266 | ||
| 266 | // Depth buffering | 267 | // Depth buffering |
| @@ -880,6 +881,8 @@ void RasterizerOpenGL::SetShader() { | |||
| 880 | glUniformBlockBinding(current_shader->shader.handle, block_index, 0); | 881 | glUniformBlockBinding(current_shader->shader.handle, block_index, 0); |
| 881 | 882 | ||
| 882 | // Update uniforms | 883 | // Update uniforms |
| 884 | SyncDepthScale(); | ||
| 885 | SyncDepthOffset(); | ||
| 883 | SyncAlphaTest(); | 886 | SyncAlphaTest(); |
| 884 | SyncCombinerColor(); | 887 | SyncCombinerColor(); |
| 885 | auto& tev_stages = Pica::g_state.regs.GetTevStages(); | 888 | auto& tev_stages = Pica::g_state.regs.GetTevStages(); |
| @@ -922,13 +925,20 @@ void RasterizerOpenGL::SyncCullMode() { | |||
| 922 | } | 925 | } |
| 923 | } | 926 | } |
| 924 | 927 | ||
| 925 | void RasterizerOpenGL::SyncDepthModifiers() { | 928 | void RasterizerOpenGL::SyncDepthScale() { |
| 926 | float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); | 929 | float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); |
| 927 | float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); | 930 | if (depth_scale != uniform_block_data.data.depth_scale) { |
| 931 | uniform_block_data.data.depth_scale = depth_scale; | ||
| 932 | uniform_block_data.dirty = true; | ||
| 933 | } | ||
| 934 | } | ||
| 928 | 935 | ||
| 929 | uniform_block_data.data.depth_scale = depth_scale; | 936 | void RasterizerOpenGL::SyncDepthOffset() { |
| 930 | uniform_block_data.data.depth_offset = depth_offset; | 937 | float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); |
| 931 | uniform_block_data.dirty = true; | 938 | if (depth_offset != uniform_block_data.data.depth_offset) { |
| 939 | uniform_block_data.data.depth_offset = depth_offset; | ||
| 940 | uniform_block_data.dirty = true; | ||
| 941 | } | ||
| 932 | } | 942 | } |
| 933 | 943 | ||
| 934 | void RasterizerOpenGL::SyncBlendEnabled() { | 944 | void RasterizerOpenGL::SyncBlendEnabled() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index eed00011a..d70369400 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -339,8 +339,11 @@ private: | |||
| 339 | /// Syncs the cull mode to match the PICA register | 339 | /// Syncs the cull mode to match the PICA register |
| 340 | void SyncCullMode(); | 340 | void SyncCullMode(); |
| 341 | 341 | ||
| 342 | /// Syncs the depth scale and offset to match the PICA registers | 342 | /// Syncs the depth scale to match the PICA register |
| 343 | void SyncDepthModifiers(); | 343 | void SyncDepthScale(); |
| 344 | |||
| 345 | /// Syncs the depth offset to match the PICA register | ||
| 346 | void SyncDepthOffset(); | ||
| 344 | 347 | ||
| 345 | /// Syncs the blend enabled status to match the PICA register | 348 | /// Syncs the blend enabled status to match the PICA register |
| 346 | void SyncBlendEnabled(); | 349 | void SyncBlendEnabled(); |
| @@ -413,7 +416,7 @@ private: | |||
| 413 | UniformData data; | 416 | UniformData data; |
| 414 | bool lut_dirty[6]; | 417 | bool lut_dirty[6]; |
| 415 | bool dirty; | 418 | bool dirty; |
| 416 | } uniform_block_data; | 419 | } uniform_block_data = {}; |
| 417 | 420 | ||
| 418 | std::array<SamplerInfo, 3> texture_samplers; | 421 | std::array<SamplerInfo, 3> texture_samplers; |
| 419 | OGLVertexArray vertex_array; | 422 | OGLVertexArray vertex_array; |
| @@ -422,5 +425,5 @@ private: | |||
| 422 | OGLFramebuffer framebuffer; | 425 | OGLFramebuffer framebuffer; |
| 423 | 426 | ||
| 424 | std::array<OGLTexture, 6> lighting_luts; | 427 | std::array<OGLTexture, 6> lighting_luts; |
| 425 | std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; | 428 | std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{}; |
| 426 | }; | 429 | }; |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 449fc703f..161097610 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -64,10 +64,10 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | |||
| 64 | 64 | ||
| 65 | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | 65 | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 66 | auto& config = g_state.regs.vs; | 66 | auto& config = g_state.regs.vs; |
| 67 | auto& setup = g_state.vs; | ||
| 67 | 68 | ||
| 68 | MICROPROFILE_SCOPE(GPU_Shader); | 69 | MICROPROFILE_SCOPE(GPU_Shader); |
| 69 | 70 | ||
| 70 | state.program_counter = config.main_offset; | ||
| 71 | state.debug.max_offset = 0; | 71 | state.debug.max_offset = 0; |
| 72 | state.debug.max_opdesc_id = 0; | 72 | state.debug.max_opdesc_id = 0; |
| 73 | 73 | ||
| @@ -82,11 +82,11 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 82 | 82 | ||
| 83 | #ifdef ARCHITECTURE_x86_64 | 83 | #ifdef ARCHITECTURE_x86_64 |
| 84 | if (VideoCore::g_shader_jit_enabled) | 84 | if (VideoCore::g_shader_jit_enabled) |
| 85 | jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); | 85 | jit_shader->Run(setup, state, config.main_offset); |
| 86 | else | 86 | else |
| 87 | RunInterpreter(state); | 87 | RunInterpreter(setup, state, config.main_offset); |
| 88 | #else | 88 | #else |
| 89 | RunInterpreter(state); | 89 | RunInterpreter(setup, state, config.main_offset); |
| 90 | #endif // ARCHITECTURE_x86_64 | 90 | #endif // ARCHITECTURE_x86_64 |
| 91 | 91 | ||
| 92 | // Setup output data | 92 | // Setup output data |
| @@ -143,7 +143,6 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 143 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { | 143 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { |
| 144 | UnitState<true> state; | 144 | UnitState<true> state; |
| 145 | 145 | ||
| 146 | state.program_counter = config.main_offset; | ||
| 147 | state.debug.max_offset = 0; | 146 | state.debug.max_offset = 0; |
| 148 | state.debug.max_opdesc_id = 0; | 147 | state.debug.max_opdesc_id = 0; |
| 149 | 148 | ||
| @@ -158,7 +157,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ | |||
| 158 | state.conditional_code[0] = false; | 157 | state.conditional_code[0] = false; |
| 159 | state.conditional_code[1] = false; | 158 | state.conditional_code[1] = false; |
| 160 | 159 | ||
| 161 | RunInterpreter(state); | 160 | RunInterpreter(setup, state, config.main_offset); |
| 162 | return state.debug; | 161 | return state.debug; |
| 163 | } | 162 | } |
| 164 | 163 | ||
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 7f417675a..84898f21c 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -272,38 +272,21 @@ struct UnitState { | |||
| 272 | } registers; | 272 | } registers; |
| 273 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | 273 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); |
| 274 | 274 | ||
| 275 | u32 program_counter; | ||
| 276 | bool conditional_code[2]; | 275 | bool conditional_code[2]; |
| 277 | 276 | ||
| 278 | // Two Address registers and one loop counter | 277 | // Two Address registers and one loop counter |
| 279 | // TODO: How many bits do these actually have? | 278 | // TODO: How many bits do these actually have? |
| 280 | s32 address_registers[3]; | 279 | s32 address_registers[3]; |
| 281 | 280 | ||
| 282 | enum { | ||
| 283 | INVALID_ADDRESS = 0xFFFFFFFF | ||
| 284 | }; | ||
| 285 | |||
| 286 | struct CallStackElement { | ||
| 287 | u32 final_address; // Address upon which we jump to return_address | ||
| 288 | u32 return_address; // Where to jump when leaving scope | ||
| 289 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 290 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 291 | // TODO: Should this be a signed value? Does it even matter? | ||
| 292 | u32 loop_address; // The address where we'll return to after each loop iteration | ||
| 293 | }; | ||
| 294 | |||
| 295 | // TODO: Is there a maximal size for this? | ||
| 296 | boost::container::static_vector<CallStackElement, 16> call_stack; | ||
| 297 | |||
| 298 | DebugData<Debug> debug; | 281 | DebugData<Debug> debug; |
| 299 | 282 | ||
| 300 | static size_t InputOffset(const SourceRegister& reg) { | 283 | static size_t InputOffset(const SourceRegister& reg) { |
| 301 | switch (reg.GetRegisterType()) { | 284 | switch (reg.GetRegisterType()) { |
| 302 | case RegisterType::Input: | 285 | case RegisterType::Input: |
| 303 | return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 286 | return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 304 | 287 | ||
| 305 | case RegisterType::Temporary: | 288 | case RegisterType::Temporary: |
| 306 | return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 289 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 307 | 290 | ||
| 308 | default: | 291 | default: |
| 309 | UNREACHABLE(); | 292 | UNREACHABLE(); |
| @@ -314,10 +297,10 @@ struct UnitState { | |||
| 314 | static size_t OutputOffset(const DestRegister& reg) { | 297 | static size_t OutputOffset(const DestRegister& reg) { |
| 315 | switch (reg.GetRegisterType()) { | 298 | switch (reg.GetRegisterType()) { |
| 316 | case RegisterType::Output: | 299 | case RegisterType::Output: |
| 317 | return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 300 | return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 318 | 301 | ||
| 319 | case RegisterType::Temporary: | 302 | case RegisterType::Temporary: |
| 320 | return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 303 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 321 | 304 | ||
| 322 | default: | 305 | default: |
| 323 | UNREACHABLE(); | 306 | UNREACHABLE(); |
| @@ -340,7 +323,22 @@ struct ShaderSetup { | |||
| 340 | std::array<Math::Vec4<u8>, 4> i; | 323 | std::array<Math::Vec4<u8>, 4> i; |
| 341 | } uniforms; | 324 | } uniforms; |
| 342 | 325 | ||
| 343 | Math::Vec4<float24> default_attributes[16]; | 326 | static size_t UniformOffset(RegisterType type, unsigned index) { |
| 327 | switch (type) { | ||
| 328 | case RegisterType::FloatUniform: | ||
| 329 | return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); | ||
| 330 | |||
| 331 | case RegisterType::BoolUniform: | ||
| 332 | return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); | ||
| 333 | |||
| 334 | case RegisterType::IntUniform: | ||
| 335 | return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); | ||
| 336 | |||
| 337 | default: | ||
| 338 | UNREACHABLE(); | ||
| 339 | return 0; | ||
| 340 | } | ||
| 341 | } | ||
| 344 | 342 | ||
| 345 | std::array<u32, 1024> program_code; | 343 | std::array<u32, 1024> program_code; |
| 346 | std::array<u32, 1024> swizzle_data; | 344 | std::array<u32, 1024> swizzle_data; |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 7710f7fbc..714e8bfd5 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -29,8 +29,24 @@ namespace Pica { | |||
| 29 | 29 | ||
| 30 | namespace Shader { | 30 | namespace Shader { |
| 31 | 31 | ||
| 32 | constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF; | ||
| 33 | |||
| 34 | struct CallStackElement { | ||
| 35 | u32 final_address; // Address upon which we jump to return_address | ||
| 36 | u32 return_address; // Where to jump when leaving scope | ||
| 37 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 38 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 39 | // TODO: Should this be a signed value? Does it even matter? | ||
| 40 | u32 loop_address; // The address where we'll return to after each loop iteration | ||
| 41 | }; | ||
| 42 | |||
| 32 | template<bool Debug> | 43 | template<bool Debug> |
| 33 | void RunInterpreter(UnitState<Debug>& state) { | 44 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { |
| 45 | // TODO: Is there a maximal size for this? | ||
| 46 | boost::container::static_vector<CallStackElement, 16> call_stack; | ||
| 47 | |||
| 48 | u32 program_counter = offset; | ||
| 49 | |||
| 34 | const auto& uniforms = g_state.vs.uniforms; | 50 | const auto& uniforms = g_state.vs.uniforms; |
| 35 | const auto& swizzle_data = g_state.vs.swizzle_data; | 51 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| 36 | const auto& program_code = g_state.vs.program_code; | 52 | const auto& program_code = g_state.vs.program_code; |
| @@ -41,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 41 | unsigned iteration = 0; | 57 | unsigned iteration = 0; |
| 42 | bool exit_loop = false; | 58 | bool exit_loop = false; |
| 43 | while (!exit_loop) { | 59 | while (!exit_loop) { |
| 44 | if (!state.call_stack.empty()) { | 60 | if (!call_stack.empty()) { |
| 45 | auto& top = state.call_stack.back(); | 61 | auto& top = call_stack.back(); |
| 46 | if (state.program_counter == top.final_address) { | 62 | if (program_counter == top.final_address) { |
| 47 | state.address_registers[2] += top.loop_increment; | 63 | state.address_registers[2] += top.loop_increment; |
| 48 | 64 | ||
| 49 | if (top.repeat_counter-- == 0) { | 65 | if (top.repeat_counter-- == 0) { |
| 50 | state.program_counter = top.return_address; | 66 | program_counter = top.return_address; |
| 51 | state.call_stack.pop_back(); | 67 | call_stack.pop_back(); |
| 52 | } else { | 68 | } else { |
| 53 | state.program_counter = top.loop_address; | 69 | program_counter = top.loop_address; |
| 54 | } | 70 | } |
| 55 | 71 | ||
| 56 | // TODO: Is "trying again" accurate to hardware? | 72 | // TODO: Is "trying again" accurate to hardware? |
| @@ -58,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 58 | } | 74 | } |
| 59 | } | 75 | } |
| 60 | 76 | ||
| 61 | const Instruction instr = { program_code[state.program_counter] }; | 77 | const Instruction instr = { program_code[program_counter] }; |
| 62 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 78 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |
| 63 | 79 | ||
| 64 | static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, | 80 | static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, |
| 65 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 81 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 66 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 82 | program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 67 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); | 83 | ASSERT(call_stack.size() < call_stack.capacity()); |
| 68 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 84 | call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |
| 69 | }; | 85 | }; |
| 70 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | 86 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); |
| 71 | if (iteration > 0) | 87 | if (iteration > 0) |
| 72 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | 88 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter); |
| 73 | 89 | ||
| 74 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | 90 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter); |
| 75 | 91 | ||
| 76 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 92 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 77 | switch (source_reg.GetRegisterType()) { | 93 | switch (source_reg.GetRegisterType()) { |
| @@ -519,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 519 | case OpCode::Id::JMPC: | 535 | case OpCode::Id::JMPC: |
| 520 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 536 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); |
| 521 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 537 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 522 | state.program_counter = instr.flow_control.dest_offset - 1; | 538 | program_counter = instr.flow_control.dest_offset - 1; |
| 523 | } | 539 | } |
| 524 | break; | 540 | break; |
| 525 | 541 | ||
| @@ -527,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 527 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 543 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 528 | 544 | ||
| 529 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { | 545 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { |
| 530 | state.program_counter = instr.flow_control.dest_offset - 1; | 546 | program_counter = instr.flow_control.dest_offset - 1; |
| 531 | } | 547 | } |
| 532 | break; | 548 | break; |
| 533 | 549 | ||
| @@ -535,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 535 | call(state, | 551 | call(state, |
| 536 | instr.flow_control.dest_offset, | 552 | instr.flow_control.dest_offset, |
| 537 | instr.flow_control.num_instructions, | 553 | instr.flow_control.num_instructions, |
| 538 | state.program_counter + 1, 0, 0); | 554 | program_counter + 1, 0, 0); |
| 539 | break; | 555 | break; |
| 540 | 556 | ||
| 541 | case OpCode::Id::CALLU: | 557 | case OpCode::Id::CALLU: |
| @@ -544,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 544 | call(state, | 560 | call(state, |
| 545 | instr.flow_control.dest_offset, | 561 | instr.flow_control.dest_offset, |
| 546 | instr.flow_control.num_instructions, | 562 | instr.flow_control.num_instructions, |
| 547 | state.program_counter + 1, 0, 0); | 563 | program_counter + 1, 0, 0); |
| 548 | } | 564 | } |
| 549 | break; | 565 | break; |
| 550 | 566 | ||
| @@ -554,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 554 | call(state, | 570 | call(state, |
| 555 | instr.flow_control.dest_offset, | 571 | instr.flow_control.dest_offset, |
| 556 | instr.flow_control.num_instructions, | 572 | instr.flow_control.num_instructions, |
| 557 | state.program_counter + 1, 0, 0); | 573 | program_counter + 1, 0, 0); |
| 558 | } | 574 | } |
| 559 | break; | 575 | break; |
| 560 | 576 | ||
| @@ -565,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 565 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 581 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 566 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 582 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 567 | call(state, | 583 | call(state, |
| 568 | state.program_counter + 1, | 584 | program_counter + 1, |
| 569 | instr.flow_control.dest_offset - state.program_counter - 1, | 585 | instr.flow_control.dest_offset - program_counter - 1, |
| 570 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 586 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 571 | } else { | 587 | } else { |
| 572 | call(state, | 588 | call(state, |
| @@ -584,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 584 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 600 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); |
| 585 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 601 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 586 | call(state, | 602 | call(state, |
| 587 | state.program_counter + 1, | 603 | program_counter + 1, |
| 588 | instr.flow_control.dest_offset - state.program_counter - 1, | 604 | instr.flow_control.dest_offset - program_counter - 1, |
| 589 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 605 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 590 | } else { | 606 | } else { |
| 591 | call(state, | 607 | call(state, |
| @@ -607,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 607 | 623 | ||
| 608 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | 624 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); |
| 609 | call(state, | 625 | call(state, |
| 610 | state.program_counter + 1, | 626 | program_counter + 1, |
| 611 | instr.flow_control.dest_offset - state.program_counter + 1, | 627 | instr.flow_control.dest_offset - program_counter + 1, |
| 612 | instr.flow_control.dest_offset + 1, | 628 | instr.flow_control.dest_offset + 1, |
| 613 | loop_param.x, | 629 | loop_param.x, |
| 614 | loop_param.z); | 630 | loop_param.z); |
| @@ -625,14 +641,14 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 625 | } | 641 | } |
| 626 | } | 642 | } |
| 627 | 643 | ||
| 628 | ++state.program_counter; | 644 | ++program_counter; |
| 629 | ++iteration; | 645 | ++iteration; |
| 630 | } | 646 | } |
| 631 | } | 647 | } |
| 632 | 648 | ||
| 633 | // Explicit instantiation | 649 | // Explicit instantiation |
| 634 | template void RunInterpreter(UnitState<false>& state); | 650 | template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset); |
| 635 | template void RunInterpreter(UnitState<true>& state); | 651 | template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset); |
| 636 | 652 | ||
| 637 | } // namespace | 653 | } // namespace |
| 638 | 654 | ||
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 6048cdf3a..bb3ce1c6e 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -11,7 +11,7 @@ namespace Shader { | |||
| 11 | template <bool Debug> struct UnitState; | 11 | template <bool Debug> struct UnitState; |
| 12 | 12 | ||
| 13 | template<bool Debug> | 13 | template<bool Debug> |
| 14 | void RunInterpreter(UnitState<Debug>& state); | 14 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); |
| 15 | 15 | ||
| 16 | } // namespace | 16 | } // namespace |
| 17 | 17 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 99f6c51eb..43e7e6b4c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -102,7 +102,7 @@ const JitFunction instr_table[64] = { | |||
| 102 | // purposes, as documented below: | 102 | // purposes, as documented below: |
| 103 | 103 | ||
| 104 | /// Pointer to the uniform memory | 104 | /// Pointer to the uniform memory |
| 105 | static const X64Reg UNIFORMS = R9; | 105 | static const X64Reg SETUP = R9; |
| 106 | /// The two 32-bit VS address offset registers set by the MOVA instruction | 106 | /// The two 32-bit VS address offset registers set by the MOVA instruction |
| 107 | static const X64Reg ADDROFFS_REG_0 = R10; | 107 | static const X64Reg ADDROFFS_REG_0 = R10; |
| 108 | static const X64Reg ADDROFFS_REG_1 = R11; | 108 | static const X64Reg ADDROFFS_REG_1 = R11; |
| @@ -117,7 +117,7 @@ static const X64Reg COND0 = R13; | |||
| 117 | /// Result of the previous CMP instruction for the Y-component comparison | 117 | /// Result of the previous CMP instruction for the Y-component comparison |
| 118 | static const X64Reg COND1 = R14; | 118 | static const X64Reg COND1 = R14; |
| 119 | /// Pointer to the UnitState instance for the current VS unit | 119 | /// Pointer to the UnitState instance for the current VS unit |
| 120 | static const X64Reg REGISTERS = R15; | 120 | static const X64Reg STATE = R15; |
| 121 | /// SIMD scratch register | 121 | /// SIMD scratch register |
| 122 | static const X64Reg SCRATCH = XMM0; | 122 | static const X64Reg SCRATCH = XMM0; |
| 123 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register | 123 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register |
| @@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15; | |||
| 136 | // State registers that must not be modified by external functions calls | 136 | // State registers that must not be modified by external functions calls |
| 137 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | 137 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed |
| 138 | static const BitSet32 persistent_regs = { | 138 | static const BitSet32 persistent_regs = { |
| 139 | UNIFORMS, REGISTERS, // Pointers to register blocks | 139 | SETUP, STATE, // Pointers to register blocks |
| 140 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers | 140 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers |
| 141 | ONE+16, NEGBIT+16, // Constants | 141 | ONE+16, NEGBIT+16, // Constants |
| 142 | }; | 142 | }; |
| @@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 177 | size_t src_offset; | 177 | size_t src_offset; |
| 178 | 178 | ||
| 179 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | 179 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { |
| 180 | src_ptr = UNIFORMS; | 180 | src_ptr = SETUP; |
| 181 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | 181 | src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); |
| 182 | } else { | 182 | } else { |
| 183 | src_ptr = REGISTERS; | 183 | src_ptr = STATE; |
| 184 | src_offset = UnitState<false>::InputOffset(src_reg); | 184 | src_offset = UnitState<false>::InputOffset(src_reg); |
| 185 | } | 185 | } |
| 186 | 186 | ||
| @@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 264 | // If all components are enabled, write the result to the destination register | 264 | // If all components are enabled, write the result to the destination register |
| 265 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 265 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| 266 | // Store dest back to memory | 266 | // Store dest back to memory |
| 267 | MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); | 267 | MOVAPS(MDisp(STATE, dest_offset_disp), src); |
| 268 | 268 | ||
| 269 | } else { | 269 | } else { |
| 270 | // Not all components are enabled, so mask the result when storing to the destination register... | 270 | // Not all components are enabled, so mask the result when storing to the destination register... |
| 271 | MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); | 271 | MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); |
| 272 | 272 | ||
| 273 | if (Common::GetCPUCaps().sse4_1) { | 273 | if (Common::GetCPUCaps().sse4_1) { |
| 274 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 274 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |
| @@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 287 | } | 287 | } |
| 288 | 288 | ||
| 289 | // Store dest back to memory | 289 | // Store dest back to memory |
| 290 | MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); | 290 | MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH); |
| 291 | } | 291 | } |
| 292 | } | 292 | } |
| 293 | 293 | ||
| @@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | |||
| 336 | } | 336 | } |
| 337 | 337 | ||
| 338 | void JitShader::Compile_UniformCondition(Instruction instr) { | 338 | void JitShader::Compile_UniformCondition(Instruction instr) { |
| 339 | int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); | 339 | int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); |
| 340 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | 340 | CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | BitSet32 JitShader::PersistentCallerSavedRegs() { | 343 | BitSet32 JitShader::PersistentCallerSavedRegs() { |
| @@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
| 714 | 714 | ||
| 715 | looping = true; | 715 | looping = true; |
| 716 | 716 | ||
| 717 | int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); | 717 | int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); |
| 718 | MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); | 718 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); |
| 719 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); | 719 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); |
| 720 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); | 720 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); |
| 721 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start | 721 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start |
| @@ -826,8 +826,8 @@ void JitShader::Compile() { | |||
| 826 | // The stack pointer is 8 modulo 16 at the entry of a procedure | 826 | // The stack pointer is 8 modulo 16 at the entry of a procedure |
| 827 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 827 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| 828 | 828 | ||
| 829 | MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); | 829 | MOV(PTRBITS, R(SETUP), R(ABI_PARAM1)); |
| 830 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); | 830 | MOV(PTRBITS, R(STATE), R(ABI_PARAM2)); |
| 831 | 831 | ||
| 832 | // Zero address/loop registers | 832 | // Zero address/loop registers |
| 833 | XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); | 833 | XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); |
| @@ -845,7 +845,7 @@ void JitShader::Compile() { | |||
| 845 | MOVAPS(NEGBIT, MatR(RAX)); | 845 | MOVAPS(NEGBIT, MatR(RAX)); |
| 846 | 846 | ||
| 847 | // Jump to start of the shader program | 847 | // Jump to start of the shader program |
| 848 | JMPptr(R(ABI_PARAM2)); | 848 | JMPptr(R(ABI_PARAM3)); |
| 849 | 849 | ||
| 850 | // Compile entire program | 850 | // Compile entire program |
| 851 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | 851 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 30aa7ff30..5468459d4 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock { | |||
| 36 | public: | 36 | public: |
| 37 | JitShader(); | 37 | JitShader(); |
| 38 | 38 | ||
| 39 | void Run(void* registers, unsigned offset) const { | 39 | void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { |
| 40 | program(registers, code_ptr[offset]); | 40 | program(&setup, &state, code_ptr[offset]); |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | void Compile(); | 43 | void Compile(); |
| @@ -117,7 +117,7 @@ private: | |||
| 117 | /// Branches that need to be fixed up once the entire shader program is compiled | 117 | /// Branches that need to be fixed up once the entire shader program is compiled |
| 118 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | 118 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; |
| 119 | 119 | ||
| 120 | using CompiledShader = void(void* registers, const u8* start_addr); | 120 | using CompiledShader = void(const void* setup, void* state, const u8* start_addr); |
| 121 | CompiledShader* program = nullptr; | 121 | CompiledShader* program = nullptr; |
| 122 | }; | 122 | }; |
| 123 | 123 | ||
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 21ae52949..83896814f 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp | |||
| @@ -124,7 +124,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I | |||
| 124 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | 124 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |
| 125 | } else if (vertex_attribute_is_default[i]) { | 125 | } else if (vertex_attribute_is_default[i]) { |
| 126 | // Load the default attribute if we're configured to do so | 126 | // Load the default attribute if we're configured to do so |
| 127 | input.attr[i] = g_state.vs.default_attributes[i]; | 127 | input.attr[i] = g_state.vs_default_attributes[i]; |
| 128 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | 128 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", |
| 129 | i, vertex, index, | 129 | i, vertex, index, |
| 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |