summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/audio_core/CMakeLists.txt4
-rw-r--r--src/audio_core/hle/dsp.cpp75
-rw-r--r--src/audio_core/hle/dsp.h2
-rw-r--r--src/audio_core/hle/mixers.cpp201
-rw-r--r--src/audio_core/hle/mixers.h63
-rw-r--r--src/audio_core/hle/source.cpp4
-rw-r--r--src/audio_core/time_stretch.cpp144
-rw-r--r--src/audio_core/time_stretch.h57
-rw-r--r--src/citra_qt/CMakeLists.txt3
-rw-r--r--src/citra_qt/configure.ui11
-rw-r--r--src/citra_qt/configure_audio.cpp44
-rw-r--r--src/citra_qt/configure_audio.h27
-rw-r--r--src/citra_qt/configure_audio.ui48
-rw-r--r--src/citra_qt/configure_dialog.cpp1
-rw-r--r--src/citra_qt/debugger/graphics_tracing.cpp2
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp2
-rw-r--r--src/citra_qt/debugger/profiler.cpp16
-rw-r--r--src/common/swap.h68
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/arm/arm_interface.h1
-rw-r--r--src/core/arm/dyncom/arm_dyncom.cpp2
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp12
-rw-r--r--src/core/gdbstub/gdbstub.cpp6
-rw-r--r--src/core/hle/applets/applet.h1
-rw-r--r--src/core/hle/applets/mii_selector.cpp9
-rw-r--r--src/core/hle/applets/swkbd.cpp8
-rw-r--r--src/core/hle/function_wrappers.h3
-rw-r--r--src/core/hle/kernel/memory.cpp5
-rw-r--r--src/core/hle/kernel/process.cpp2
-rw-r--r--src/core/hle/kernel/process.h7
-rw-r--r--src/core/hle/kernel/shared_memory.cpp177
-rw-r--r--src/core/hle/kernel/shared_memory.h48
-rw-r--r--src/core/hle/kernel/thread.cpp86
-rw-r--r--src/core/hle/kernel/thread.h4
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/act_a.cpp26
-rw-r--r--src/core/hle/service/act_a.h23
-rw-r--r--src/core/hle/service/act_u.cpp3
-rw-r--r--src/core/hle/service/apt/apt.cpp58
-rw-r--r--src/core/hle/service/apt/bcfnt/bcfnt.cpp71
-rw-r--r--src/core/hle/service/apt/bcfnt/bcfnt.h87
-rw-r--r--src/core/hle/service/csnd_snd.cpp13
-rw-r--r--src/core/hle/service/dsp_dsp.cpp4
-rw-r--r--src/core/hle/service/gsp_gpu.cpp5
-rw-r--r--src/core/hle/service/hid/hid.cpp5
-rw-r--r--src/core/hle/service/ir/ir.cpp5
-rw-r--r--src/core/hle/service/service.cpp2
-rw-r--r--src/core/hle/svc.cpp57
-rw-r--r--src/core/memory.h6
-rw-r--r--src/tests/CMakeLists.txt16
-rw-r--r--src/tests/tests.cpp9
-rw-r--r--src/video_core/clipper.cpp4
-rw-r--r--src/video_core/command_processor.cpp10
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp199
-rw-r--r--src/video_core/debug_utils/debug_utils.h6
-rw-r--r--src/video_core/pica.cpp2
-rw-r--r--src/video_core/pica.h30
-rw-r--r--src/video_core/pica_state.h2
-rw-r--r--src/video_core/rasterizer.cpp55
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h229
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp177
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state.h2
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h20
-rw-r--r--src/video_core/shader/shader.cpp33
-rw-r--r--src/video_core/shader/shader.h126
-rw-r--r--src/video_core/shader/shader_interpreter.cpp74
-rw-r--r--src/video_core/shader/shader_interpreter.h2
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp32
-rw-r--r--src/video_core/shader/shader_jit_x64.h6
-rw-r--r--src/video_core/vertex_loader.cpp2
75 files changed, 1961 insertions, 642 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index de4fe716a..1e1245160 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,6 +5,7 @@ add_subdirectory(common)
5add_subdirectory(core) 5add_subdirectory(core)
6add_subdirectory(video_core) 6add_subdirectory(video_core)
7add_subdirectory(audio_core) 7add_subdirectory(audio_core)
8add_subdirectory(tests)
8if (ENABLE_SDL2) 9if (ENABLE_SDL2)
9 add_subdirectory(citra) 10 add_subdirectory(citra)
10endif() 11endif()
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 13b5e400e..a72a907ef 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -3,10 +3,12 @@ set(SRCS
3 codec.cpp 3 codec.cpp
4 hle/dsp.cpp 4 hle/dsp.cpp
5 hle/filter.cpp 5 hle/filter.cpp
6 hle/mixers.cpp
6 hle/pipe.cpp 7 hle/pipe.cpp
7 hle/source.cpp 8 hle/source.cpp
8 interpolate.cpp 9 interpolate.cpp
9 sink_details.cpp 10 sink_details.cpp
11 time_stretch.cpp
10 ) 12 )
11 13
12set(HEADERS 14set(HEADERS
@@ -15,12 +17,14 @@ set(HEADERS
15 hle/common.h 17 hle/common.h
16 hle/dsp.h 18 hle/dsp.h
17 hle/filter.h 19 hle/filter.h
20 hle/mixers.h
18 hle/pipe.h 21 hle/pipe.h
19 hle/source.h 22 hle/source.h
20 interpolate.h 23 interpolate.h
21 null_sink.h 24 null_sink.h
22 sink.h 25 sink.h
23 sink_details.h 26 sink_details.h
27 time_stretch.h
24 ) 28 )
25 29
26include_directories(../../externals/soundtouch/include) 30include_directories(../../externals/soundtouch/include)
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index 0cdbdb06a..0640e1eff 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -6,13 +6,17 @@
6#include <memory> 6#include <memory>
7 7
8#include "audio_core/hle/dsp.h" 8#include "audio_core/hle/dsp.h"
9#include "audio_core/hle/mixers.h"
9#include "audio_core/hle/pipe.h" 10#include "audio_core/hle/pipe.h"
10#include "audio_core/hle/source.h" 11#include "audio_core/hle/source.h"
11#include "audio_core/sink.h" 12#include "audio_core/sink.h"
13#include "audio_core/time_stretch.h"
12 14
13namespace DSP { 15namespace DSP {
14namespace HLE { 16namespace HLE {
15 17
18// Region management
19
16std::array<SharedMemory, 2> g_regions; 20std::array<SharedMemory, 2> g_regions;
17 21
18static size_t CurrentRegionIndex() { 22static size_t CurrentRegionIndex() {
@@ -40,43 +44,96 @@ static SharedMemory& WriteRegion() {
40 return g_regions[1 - CurrentRegionIndex()]; 44 return g_regions[1 - CurrentRegionIndex()];
41} 45}
42 46
47// Audio processing and mixing
48
43static std::array<Source, num_sources> sources = { 49static std::array<Source, num_sources> sources = {
44 Source(0), Source(1), Source(2), Source(3), Source(4), Source(5), 50 Source(0), Source(1), Source(2), Source(3), Source(4), Source(5),
45 Source(6), Source(7), Source(8), Source(9), Source(10), Source(11), 51 Source(6), Source(7), Source(8), Source(9), Source(10), Source(11),
46 Source(12), Source(13), Source(14), Source(15), Source(16), Source(17), 52 Source(12), Source(13), Source(14), Source(15), Source(16), Source(17),
47 Source(18), Source(19), Source(20), Source(21), Source(22), Source(23) 53 Source(18), Source(19), Source(20), Source(21), Source(22), Source(23)
48}; 54};
55static Mixers mixers;
56
57static StereoFrame16 GenerateCurrentFrame() {
58 SharedMemory& read = ReadRegion();
59 SharedMemory& write = WriteRegion();
60
61 std::array<QuadFrame32, 3> intermediate_mixes = {};
62
63 // Generate intermediate mixes
64 for (size_t i = 0; i < num_sources; i++) {
65 write.source_statuses.status[i] = sources[i].Tick(read.source_configurations.config[i], read.adpcm_coefficients.coeff[i]);
66 for (size_t mix = 0; mix < 3; mix++) {
67 sources[i].MixInto(intermediate_mixes[mix], mix);
68 }
69 }
70
71 // Generate final mix
72 write.dsp_status = mixers.Tick(read.dsp_configuration, read.intermediate_mix_samples, write.intermediate_mix_samples, intermediate_mixes);
73
74 StereoFrame16 output_frame = mixers.GetOutput();
75
76 // Write current output frame to the shared memory region
77 for (size_t samplei = 0; samplei < output_frame.size(); samplei++) {
78 for (size_t channeli = 0; channeli < output_frame[0].size(); channeli++) {
79 write.final_samples.pcm16[samplei][channeli] = s16_le(output_frame[samplei][channeli]);
80 }
81 }
82
83 return output_frame;
84}
85
86// Audio output
49 87
50static std::unique_ptr<AudioCore::Sink> sink; 88static std::unique_ptr<AudioCore::Sink> sink;
89static AudioCore::TimeStretcher time_stretcher;
90
91static void OutputCurrentFrame(const StereoFrame16& frame) {
92 time_stretcher.AddSamples(&frame[0][0], frame.size());
93 sink->EnqueueSamples(time_stretcher.Process(sink->SamplesInQueue()));
94}
95
96// Public Interface
51 97
52void Init() { 98void Init() {
53 DSP::HLE::ResetPipes(); 99 DSP::HLE::ResetPipes();
100
54 for (auto& source : sources) { 101 for (auto& source : sources) {
55 source.Reset(); 102 source.Reset();
56 } 103 }
104
105 mixers.Reset();
106
107 time_stretcher.Reset();
108 if (sink) {
109 time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
110 }
57} 111}
58 112
59void Shutdown() { 113void Shutdown() {
114 time_stretcher.Flush();
115 while (true) {
116 std::vector<s16> residual_audio = time_stretcher.Process(sink->SamplesInQueue());
117 if (residual_audio.empty())
118 break;
119 sink->EnqueueSamples(residual_audio);
120 }
60} 121}
61 122
62bool Tick() { 123bool Tick() {
63 SharedMemory& read = ReadRegion(); 124 StereoFrame16 current_frame = {};
64 SharedMemory& write = WriteRegion();
65 125
66 std::array<QuadFrame32, 3> intermediate_mixes = {}; 126 // TODO: Check dsp::DSP semaphore (which indicates emulated application has finished writing to shared memory region)
127 current_frame = GenerateCurrentFrame();
67 128
68 for (size_t i = 0; i < num_sources; i++) { 129 OutputCurrentFrame(current_frame);
69 write.source_statuses.status[i] = sources[i].Tick(read.source_configurations.config[i], read.adpcm_coefficients.coeff[i]);
70 for (size_t mix = 0; mix < 3; mix++) {
71 sources[i].MixInto(intermediate_mixes[mix], mix);
72 }
73 }
74 130
75 return true; 131 return true;
76} 132}
77 133
78void SetSink(std::unique_ptr<AudioCore::Sink> sink_) { 134void SetSink(std::unique_ptr<AudioCore::Sink> sink_) {
79 sink = std::move(sink_); 135 sink = std::move(sink_);
136 time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
80} 137}
81 138
82} // namespace HLE 139} // namespace HLE
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index f6e53f68f..9275cd7de 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -428,7 +428,7 @@ ASSERT_DSP_STRUCT(DspStatus, 32);
428/// Final mixed output in PCM16 stereo format, what you hear out of the speakers. 428/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
429/// When the application writes to this region it has no effect. 429/// When the application writes to this region it has no effect.
430struct FinalMixSamples { 430struct FinalMixSamples {
431 s16_le pcm16[2 * samples_per_frame]; 431 s16_le pcm16[samples_per_frame][2];
432}; 432};
433ASSERT_DSP_STRUCT(FinalMixSamples, 640); 433ASSERT_DSP_STRUCT(FinalMixSamples, 640);
434 434
diff --git a/src/audio_core/hle/mixers.cpp b/src/audio_core/hle/mixers.cpp
new file mode 100644
index 000000000..18335f7f0
--- /dev/null
+++ b/src/audio_core/hle/mixers.cpp
@@ -0,0 +1,201 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstddef>
6
7#include "audio_core/hle/common.h"
8#include "audio_core/hle/dsp.h"
9#include "audio_core/hle/mixers.h"
10
11#include "common/assert.h"
12#include "common/logging/log.h"
13#include "common/math_util.h"
14
15namespace DSP {
16namespace HLE {
17
18void Mixers::Reset() {
19 current_frame.fill({});
20 state = {};
21}
22
23DspStatus Mixers::Tick(DspConfiguration& config,
24 const IntermediateMixSamples& read_samples,
25 IntermediateMixSamples& write_samples,
26 const std::array<QuadFrame32, 3>& input)
27{
28 ParseConfig(config);
29
30 AuxReturn(read_samples);
31 AuxSend(write_samples, input);
32
33 MixCurrentFrame();
34
35 return GetCurrentStatus();
36}
37
38void Mixers::ParseConfig(DspConfiguration& config) {
39 if (!config.dirty_raw) {
40 return;
41 }
42
43 if (config.mixer1_enabled_dirty) {
44 config.mixer1_enabled_dirty.Assign(0);
45 state.mixer1_enabled = config.mixer1_enabled != 0;
46 LOG_TRACE(Audio_DSP, "mixers mixer1_enabled = %hu", config.mixer1_enabled);
47 }
48
49 if (config.mixer2_enabled_dirty) {
50 config.mixer2_enabled_dirty.Assign(0);
51 state.mixer2_enabled = config.mixer2_enabled != 0;
52 LOG_TRACE(Audio_DSP, "mixers mixer2_enabled = %hu", config.mixer2_enabled);
53 }
54
55 if (config.volume_0_dirty) {
56 config.volume_0_dirty.Assign(0);
57 state.intermediate_mixer_volume[0] = config.volume[0];
58 LOG_TRACE(Audio_DSP, "mixers volume[0] = %f", config.volume[0]);
59 }
60
61 if (config.volume_1_dirty) {
62 config.volume_1_dirty.Assign(0);
63 state.intermediate_mixer_volume[1] = config.volume[1];
64 LOG_TRACE(Audio_DSP, "mixers volume[1] = %f", config.volume[1]);
65 }
66
67 if (config.volume_2_dirty) {
68 config.volume_2_dirty.Assign(0);
69 state.intermediate_mixer_volume[2] = config.volume[2];
70 LOG_TRACE(Audio_DSP, "mixers volume[2] = %f", config.volume[2]);
71 }
72
73 if (config.output_format_dirty) {
74 config.output_format_dirty.Assign(0);
75 state.output_format = config.output_format;
76 LOG_TRACE(Audio_DSP, "mixers output_format = %zu", static_cast<size_t>(config.output_format));
77 }
78
79 if (config.headphones_connected_dirty) {
80 config.headphones_connected_dirty.Assign(0);
81 // Do nothing.
82 // (Note: Whether headphones are connected does affect coefficients used for surround sound.)
83 LOG_TRACE(Audio_DSP, "mixers headphones_connected=%hu", config.headphones_connected);
84 }
85
86 if (config.dirty_raw) {
87 LOG_DEBUG(Audio_DSP, "mixers remaining_dirty=%x", config.dirty_raw);
88 }
89
90 config.dirty_raw = 0;
91}
92
93static s16 ClampToS16(s32 value) {
94 return static_cast<s16>(MathUtil::Clamp(value, -32768, 32767));
95}
96
97static std::array<s16, 2> AddAndClampToS16(const std::array<s16, 2>& a, const std::array<s16, 2>& b) {
98 return {
99 ClampToS16(static_cast<s32>(a[0]) + static_cast<s32>(b[0])),
100 ClampToS16(static_cast<s32>(a[1]) + static_cast<s32>(b[1]))
101 };
102}
103
104void Mixers::DownmixAndMixIntoCurrentFrame(float gain, const QuadFrame32& samples) {
105 // TODO(merry): Limiter. (Currently we're performing final mixing assuming a disabled limiter.)
106
107 switch (state.output_format) {
108 case OutputFormat::Mono:
109 std::transform(current_frame.begin(), current_frame.end(), samples.begin(), current_frame.begin(),
110 [gain](const std::array<s16, 2>& accumulator, const std::array<s32, 4>& sample) -> std::array<s16, 2> {
111 // Downmix to mono
112 s16 mono = ClampToS16(static_cast<s32>((gain * sample[0] + gain * sample[1] + gain * sample[2] + gain * sample[3]) / 2));
113 // Mix into current frame
114 return AddAndClampToS16(accumulator, { mono, mono });
115 });
116 return;
117
118 case OutputFormat::Surround:
119 // TODO(merry): Implement surround sound.
120 // fallthrough
121
122 case OutputFormat::Stereo:
123 std::transform(current_frame.begin(), current_frame.end(), samples.begin(), current_frame.begin(),
124 [gain](const std::array<s16, 2>& accumulator, const std::array<s32, 4>& sample) -> std::array<s16, 2> {
125 // Downmix to stereo
126 s16 left = ClampToS16(static_cast<s32>(gain * sample[0] + gain * sample[2]));
127 s16 right = ClampToS16(static_cast<s32>(gain * sample[1] + gain * sample[3]));
128 // Mix into current frame
129 return AddAndClampToS16(accumulator, { left, right });
130 });
131 return;
132 }
133
134 UNREACHABLE_MSG("Invalid output_format %zu", static_cast<size_t>(state.output_format));
135}
136
137void Mixers::AuxReturn(const IntermediateMixSamples& read_samples) {
138 // NOTE: read_samples.mix{1,2}.pcm32 annoyingly have their dimensions in reverse order to QuadFrame32.
139
140 if (state.mixer1_enabled) {
141 for (size_t sample = 0; sample < samples_per_frame; sample++) {
142 for (size_t channel = 0; channel < 4; channel++) {
143 state.intermediate_mix_buffer[1][sample][channel] = read_samples.mix1.pcm32[channel][sample];
144 }
145 }
146 }
147
148 if (state.mixer2_enabled) {
149 for (size_t sample = 0; sample < samples_per_frame; sample++) {
150 for (size_t channel = 0; channel < 4; channel++) {
151 state.intermediate_mix_buffer[2][sample][channel] = read_samples.mix2.pcm32[channel][sample];
152 }
153 }
154 }
155}
156
157void Mixers::AuxSend(IntermediateMixSamples& write_samples, const std::array<QuadFrame32, 3>& input) {
158 // NOTE: read_samples.mix{1,2}.pcm32 annoyingly have their dimensions in reverse order to QuadFrame32.
159
160 state.intermediate_mix_buffer[0] = input[0];
161
162 if (state.mixer1_enabled) {
163 for (size_t sample = 0; sample < samples_per_frame; sample++) {
164 for (size_t channel = 0; channel < 4; channel++) {
165 write_samples.mix1.pcm32[channel][sample] = input[1][sample][channel];
166 }
167 }
168 } else {
169 state.intermediate_mix_buffer[1] = input[1];
170 }
171
172 if (state.mixer2_enabled) {
173 for (size_t sample = 0; sample < samples_per_frame; sample++) {
174 for (size_t channel = 0; channel < 4; channel++) {
175 write_samples.mix2.pcm32[channel][sample] = input[2][sample][channel];
176 }
177 }
178 } else {
179 state.intermediate_mix_buffer[2] = input[2];
180 }
181}
182
183void Mixers::MixCurrentFrame() {
184 current_frame.fill({});
185
186 for (size_t mix = 0; mix < 3; mix++) {
187 DownmixAndMixIntoCurrentFrame(state.intermediate_mixer_volume[mix], state.intermediate_mix_buffer[mix]);
188 }
189
190 // TODO(merry): Compressor. (We currently assume a disabled compressor.)
191}
192
193DspStatus Mixers::GetCurrentStatus() const {
194 DspStatus status;
195 status.unknown = 0;
196 status.dropped_frames = 0;
197 return status;
198}
199
200} // namespace HLE
201} // namespace DSP
diff --git a/src/audio_core/hle/mixers.h b/src/audio_core/hle/mixers.h
new file mode 100644
index 000000000..b52952eb5
--- /dev/null
+++ b/src/audio_core/hle/mixers.h
@@ -0,0 +1,63 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "audio_core/hle/common.h"
10#include "audio_core/hle/dsp.h"
11
12namespace DSP {
13namespace HLE {
14
15class Mixers final {
16public:
17 Mixers() {
18 Reset();
19 }
20
21 void Reset();
22
23 DspStatus Tick(DspConfiguration& config,
24 const IntermediateMixSamples& read_samples,
25 IntermediateMixSamples& write_samples,
26 const std::array<QuadFrame32, 3>& input);
27
28 StereoFrame16 GetOutput() const {
29 return current_frame;
30 }
31
32private:
33 StereoFrame16 current_frame = {};
34
35 using OutputFormat = DspConfiguration::OutputFormat;
36
37 struct {
38 std::array<float, 3> intermediate_mixer_volume = {};
39
40 bool mixer1_enabled = false;
41 bool mixer2_enabled = false;
42 std::array<QuadFrame32, 3> intermediate_mix_buffer = {};
43
44 OutputFormat output_format = OutputFormat::Stereo;
45
46 } state;
47
48 /// INTERNAL: Update our internal state based on the current config.
49 void ParseConfig(DspConfiguration& config);
50 /// INTERNAL: Read samples from shared memory that have been modified by the ARM11.
51 void AuxReturn(const IntermediateMixSamples& read_samples);
52 /// INTERNAL: Write samples to shared memory for the ARM11 to modify.
53 void AuxSend(IntermediateMixSamples& write_samples, const std::array<QuadFrame32, 3>& input);
54 /// INTERNAL: Mix current_frame.
55 void MixCurrentFrame();
56 /// INTERNAL: Downmix from quadraphonic to stereo based on status.output_format and accumulate into current_frame.
57 void DownmixAndMixIntoCurrentFrame(float gain, const QuadFrame32& samples);
58 /// INTERNAL: Generate DspStatus based on internal state.
59 DspStatus GetCurrentStatus() const;
60};
61
62} // namespace HLE
63} // namespace DSP
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
index daaf6e3f3..30552fe26 100644
--- a/src/audio_core/hle/source.cpp
+++ b/src/audio_core/hle/source.cpp
@@ -126,13 +126,13 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_l
126 if (config.simple_filter_dirty) { 126 if (config.simple_filter_dirty) {
127 config.simple_filter_dirty.Assign(0); 127 config.simple_filter_dirty.Assign(0);
128 state.filters.Configure(config.simple_filter); 128 state.filters.Configure(config.simple_filter);
129 LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update"); 129 LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update", source_id);
130 } 130 }
131 131
132 if (config.biquad_filter_dirty) { 132 if (config.biquad_filter_dirty) {
133 config.biquad_filter_dirty.Assign(0); 133 config.biquad_filter_dirty.Assign(0);
134 state.filters.Configure(config.biquad_filter); 134 state.filters.Configure(config.biquad_filter);
135 LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update"); 135 LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update", source_id);
136 } 136 }
137 137
138 if (config.interpolation_dirty) { 138 if (config.interpolation_dirty) {
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
new file mode 100644
index 000000000..ea38f40d0
--- /dev/null
+++ b/src/audio_core/time_stretch.cpp
@@ -0,0 +1,144 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <cmath>
7#include <vector>
8
9#include <SoundTouch.h>
10
11#include "audio_core/audio_core.h"
12#include "audio_core/time_stretch.h"
13
14#include "common/common_types.h"
15#include "common/logging/log.h"
16#include "common/math_util.h"
17
18using steady_clock = std::chrono::steady_clock;
19
20namespace AudioCore {
21
22constexpr double MIN_RATIO = 0.1;
23constexpr double MAX_RATIO = 100.0;
24
25static double ClampRatio(double ratio) {
26 return MathUtil::Clamp(ratio, MIN_RATIO, MAX_RATIO);
27}
28
29constexpr double MIN_DELAY_TIME = 0.05; // Units: seconds
30constexpr double MAX_DELAY_TIME = 0.25; // Units: seconds
31constexpr size_t DROP_FRAMES_SAMPLE_DELAY = 16000; // Units: samples
32
33constexpr double SMOOTHING_FACTOR = 0.007;
34
35struct TimeStretcher::Impl {
36 soundtouch::SoundTouch soundtouch;
37
38 steady_clock::time_point frame_timer = steady_clock::now();
39 size_t samples_queued = 0;
40
41 double smoothed_ratio = 1.0;
42
43 double sample_rate = static_cast<double>(native_sample_rate);
44};
45
46std::vector<s16> TimeStretcher::Process(size_t samples_in_queue) {
47 // This is a very simple algorithm without any fancy control theory. It works and is stable.
48
49 double ratio = CalculateCurrentRatio();
50 ratio = CorrectForUnderAndOverflow(ratio, samples_in_queue);
51 impl->smoothed_ratio = (1.0 - SMOOTHING_FACTOR) * impl->smoothed_ratio + SMOOTHING_FACTOR * ratio;
52 impl->smoothed_ratio = ClampRatio(impl->smoothed_ratio);
53
54 // SoundTouch's tempo definition the inverse of our ratio definition.
55 impl->soundtouch.setTempo(1.0 / impl->smoothed_ratio);
56
57 std::vector<s16> samples = GetSamples();
58 if (samples_in_queue >= DROP_FRAMES_SAMPLE_DELAY) {
59 samples.clear();
60 LOG_DEBUG(Audio, "Dropping frames!");
61 }
62 return samples;
63}
64
65TimeStretcher::TimeStretcher() : impl(std::make_unique<Impl>()) {
66 impl->soundtouch.setPitch(1.0);
67 impl->soundtouch.setChannels(2);
68 impl->soundtouch.setSampleRate(native_sample_rate);
69 Reset();
70}
71
72TimeStretcher::~TimeStretcher() {
73 impl->soundtouch.clear();
74}
75
76void TimeStretcher::SetOutputSampleRate(unsigned int sample_rate) {
77 impl->sample_rate = static_cast<double>(sample_rate);
78 impl->soundtouch.setRate(static_cast<double>(native_sample_rate) / impl->sample_rate);
79}
80
81void TimeStretcher::AddSamples(const s16* buffer, size_t num_samples) {
82 impl->soundtouch.putSamples(buffer, static_cast<uint>(num_samples));
83 impl->samples_queued += num_samples;
84}
85
86void TimeStretcher::Flush() {
87 impl->soundtouch.flush();
88}
89
90void TimeStretcher::Reset() {
91 impl->soundtouch.setTempo(1.0);
92 impl->soundtouch.clear();
93 impl->smoothed_ratio = 1.0;
94 impl->frame_timer = steady_clock::now();
95 impl->samples_queued = 0;
96 SetOutputSampleRate(native_sample_rate);
97}
98
99double TimeStretcher::CalculateCurrentRatio() {
100 const steady_clock::time_point now = steady_clock::now();
101 const std::chrono::duration<double> duration = now - impl->frame_timer;
102
103 const double expected_time = static_cast<double>(impl->samples_queued) / static_cast<double>(native_sample_rate);
104 const double actual_time = duration.count();
105
106 double ratio;
107 if (expected_time != 0) {
108 ratio = ClampRatio(actual_time / expected_time);
109 } else {
110 ratio = impl->smoothed_ratio;
111 }
112
113 impl->frame_timer = now;
114 impl->samples_queued = 0;
115
116 return ratio;
117}
118
119double TimeStretcher::CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const {
120 const size_t min_sample_delay = static_cast<size_t>(MIN_DELAY_TIME * impl->sample_rate);
121 const size_t max_sample_delay = static_cast<size_t>(MAX_DELAY_TIME * impl->sample_rate);
122
123 if (sample_delay < min_sample_delay) {
124 // Make the ratio bigger.
125 ratio = ratio > 1.0 ? ratio * ratio : sqrt(ratio);
126 } else if (sample_delay > max_sample_delay) {
127 // Make the ratio smaller.
128 ratio = ratio > 1.0 ? sqrt(ratio) : ratio * ratio;
129 }
130
131 return ClampRatio(ratio);
132}
133
134std::vector<s16> TimeStretcher::GetSamples() {
135 uint available = impl->soundtouch.numSamples();
136
137 std::vector<s16> output(static_cast<size_t>(available) * 2);
138
139 impl->soundtouch.receiveSamples(output.data(), available);
140
141 return output;
142}
143
144} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
new file mode 100644
index 000000000..1fde3f72a
--- /dev/null
+++ b/src/audio_core/time_stretch.h
@@ -0,0 +1,57 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstddef>
6#include <memory>
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace AudioCore {
12
13class TimeStretcher final {
14public:
15 TimeStretcher();
16 ~TimeStretcher();
17
18 /**
19 * Set sample rate for the samples that Process returns.
20 * @param sample_rate The sample rate.
21 */
22 void SetOutputSampleRate(unsigned int sample_rate);
23
24 /**
25 * Add samples to be processed.
26 * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format.
27 * @param num_sample Number of samples.
28 */
29 void AddSamples(const s16* sample_buffer, size_t num_samples);
30
31 /// Flush audio remaining in internal buffers.
32 void Flush();
33
34 /// Resets internal state and clears buffers.
35 void Reset();
36
37 /**
38 * Does audio stretching and produces the time-stretched samples.
39 * Timer calculations use sample_delay to determine how much of a margin we have.
40 * @param sample_delay How many samples are buffered downstream of this module and haven't been played yet.
41 * @return Samples to play in interleaved stereo PCM16 format.
42 */
43 std::vector<s16> Process(size_t sample_delay);
44
45private:
46 struct Impl;
47 std::unique_ptr<Impl> impl;
48
49 /// INTERNAL: ratio = wallclock time / emulated time
50 double CalculateCurrentRatio();
51 /// INTERNAL: If we have too many or too few samples downstream, nudge ratio in the appropriate direction.
52 double CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const;
53 /// INTERNAL: Gets the time-stretched samples from SoundTouch.
54 std::vector<s16> GetSamples();
55};
56
57} // namespace AudioCore
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 3f0099200..0a5d4624b 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -20,6 +20,7 @@ set(SRCS
20 util/spinbox.cpp 20 util/spinbox.cpp
21 util/util.cpp 21 util/util.cpp
22 bootmanager.cpp 22 bootmanager.cpp
23 configure_audio.cpp
23 configure_debug.cpp 24 configure_debug.cpp
24 configure_dialog.cpp 25 configure_dialog.cpp
25 configure_general.cpp 26 configure_general.cpp
@@ -51,6 +52,7 @@ set(HEADERS
51 util/spinbox.h 52 util/spinbox.h
52 util/util.h 53 util/util.h
53 bootmanager.h 54 bootmanager.h
55 configure_audio.h
54 configure_debug.h 56 configure_debug.h
55 configure_dialog.h 57 configure_dialog.h
56 configure_general.h 58 configure_general.h
@@ -69,6 +71,7 @@ set(UIS
69 debugger/profiler.ui 71 debugger/profiler.ui
70 debugger/registers.ui 72 debugger/registers.ui
71 configure.ui 73 configure.ui
74 configure_audio.ui
72 configure_debug.ui 75 configure_debug.ui
73 configure_general.ui 76 configure_general.ui
74 hotkeys.ui 77 hotkeys.ui
diff --git a/src/citra_qt/configure.ui b/src/citra_qt/configure.ui
index 6ae056ff9..e1624bbef 100644
--- a/src/citra_qt/configure.ui
+++ b/src/citra_qt/configure.ui
@@ -29,6 +29,11 @@
29 <string>Input</string> 29 <string>Input</string>
30 </attribute> 30 </attribute>
31 </widget> 31 </widget>
32 <widget class="ConfigureAudio" name="audioTab">
33 <attribute name="title">
34 <string>Audio</string>
35 </attribute>
36 </widget>
32 <widget class="ConfigureDebug" name="debugTab"> 37 <widget class="ConfigureDebug" name="debugTab">
33 <attribute name="title"> 38 <attribute name="title">
34 <string>Debug</string> 39 <string>Debug</string>
@@ -53,6 +58,12 @@
53 <container>1</container> 58 <container>1</container>
54 </customwidget> 59 </customwidget>
55 <customwidget> 60 <customwidget>
61 <class>ConfigureAudio</class>
62 <extends>QWidget</extends>
63 <header>configure_audio.h</header>
64 <container>1</container>
65 </customwidget>
66 <customwidget>
56 <class>ConfigureDebug</class> 67 <class>ConfigureDebug</class>
57 <extends>QWidget</extends> 68 <extends>QWidget</extends>
58 <header>configure_debug.h</header> 69 <header>configure_debug.h</header>
diff --git a/src/citra_qt/configure_audio.cpp b/src/citra_qt/configure_audio.cpp
new file mode 100644
index 000000000..cedfa2f2a
--- /dev/null
+++ b/src/citra_qt/configure_audio.cpp
@@ -0,0 +1,44 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/sink_details.h"
6
7#include "citra_qt/configure_audio.h"
8#include "ui_configure_audio.h"
9
10#include "core/settings.h"
11
12ConfigureAudio::ConfigureAudio(QWidget* parent) :
13 QWidget(parent),
14 ui(std::make_unique<Ui::ConfigureAudio>())
15{
16 ui->setupUi(this);
17
18 ui->output_sink_combo_box->clear();
19 ui->output_sink_combo_box->addItem("auto");
20 for (const auto& sink_detail : AudioCore::g_sink_details) {
21 ui->output_sink_combo_box->addItem(sink_detail.id);
22 }
23
24 this->setConfiguration();
25}
26
27ConfigureAudio::~ConfigureAudio() {
28}
29
30void ConfigureAudio::setConfiguration() {
31 int new_sink_index = 0;
32 for (int index = 0; index < ui->output_sink_combo_box->count(); index++) {
33 if (ui->output_sink_combo_box->itemText(index).toStdString() == Settings::values.sink_id) {
34 new_sink_index = index;
35 break;
36 }
37 }
38 ui->output_sink_combo_box->setCurrentIndex(new_sink_index);
39}
40
41void ConfigureAudio::applyConfiguration() {
42 Settings::values.sink_id = ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex()).toStdString();
43 Settings::Apply();
44}
diff --git a/src/citra_qt/configure_audio.h b/src/citra_qt/configure_audio.h
new file mode 100644
index 000000000..51df2e27b
--- /dev/null
+++ b/src/citra_qt/configure_audio.h
@@ -0,0 +1,27 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QWidget>
9
10namespace Ui {
11class ConfigureAudio;
12}
13
14class ConfigureAudio : public QWidget {
15 Q_OBJECT
16
17public:
18 explicit ConfigureAudio(QWidget* parent = nullptr);
19 ~ConfigureAudio();
20
21 void applyConfiguration();
22
23private:
24 void setConfiguration();
25
26 std::unique_ptr<Ui::ConfigureAudio> ui;
27};
diff --git a/src/citra_qt/configure_audio.ui b/src/citra_qt/configure_audio.ui
new file mode 100644
index 000000000..d7f6946ca
--- /dev/null
+++ b/src/citra_qt/configure_audio.ui
@@ -0,0 +1,48 @@
1<?xml version="1.0" encoding="utf-8"?>
2
3<ui version="4.0">
4 <class>ConfigureAudio</class>
5 <widget class="QWidget" name="ConfigureAudio">
6 <layout class="QVBoxLayout">
7 <item>
8 <widget class="QGroupBox">
9 <property name="title">
10 <string>Audio</string>
11 </property>
12 <layout class="QVBoxLayout">
13 <item>
14 <layout class="QHBoxLayout">
15 <item>
16 <widget class="QLabel">
17 <property name="text">
18 <string>Output Engine:</string>
19 </property>
20 </widget>
21 </item>
22 <item>
23 <widget class="QComboBox" name="output_sink_combo_box">
24 </widget>
25 </item>
26 </layout>
27 </item>
28 </layout>
29 </widget>
30 </item>
31 <item>
32 <spacer>
33 <property name="orientation">
34 <enum>Qt::Vertical</enum>
35 </property>
36 <property name="sizeHint" stdset="0">
37 <size>
38 <width>20</width>
39 <height>40</height>
40 </size>
41 </property>
42 </spacer>
43 </item>
44 </layout>
45 </widget>
46 <resources />
47 <connections />
48</ui>
diff --git a/src/citra_qt/configure_dialog.cpp b/src/citra_qt/configure_dialog.cpp
index 87c26c715..2f0317fe0 100644
--- a/src/citra_qt/configure_dialog.cpp
+++ b/src/citra_qt/configure_dialog.cpp
@@ -25,5 +25,6 @@ void ConfigureDialog::setConfiguration() {
25 25
26void ConfigureDialog::applyConfiguration() { 26void ConfigureDialog::applyConfiguration() {
27 ui->generalTab->applyConfiguration(); 27 ui->generalTab->applyConfiguration();
28 ui->audioTab->applyConfiguration();
28 ui->debugTab->applyConfiguration(); 29 ui->debugTab->applyConfiguration();
29} 30}
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
index 1402f8e79..9c80f7ec9 100644
--- a/src/citra_qt/debugger/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -74,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() {
74 std::array<u32, 4 * 16> default_attributes; 74 std::array<u32, 4 * 16> default_attributes;
75 for (unsigned i = 0; i < 16; ++i) { 75 for (unsigned i = 0; i < 16; ++i) {
76 for (unsigned comp = 0; comp < 3; ++comp) { 76 for (unsigned comp = 0; comp < 3; ++comp) {
77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); 77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32());
78 } 78 }
79 } 79 }
80 80
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index 854f6ff16..391666d35 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
501 info.labels.insert({ entry_point, "main" }); 501 info.labels.insert({ entry_point, "main" });
502 502
503 // Generate debug information 503 // Generate debug information
504 debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); 504 debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup);
505 505
506 // Reload widget state 506 // Reload widget state
507 for (int attr = 0; attr < num_attributes; ++attr) { 507 for (int attr = 0; attr < num_attributes; ++attr) {
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 7bb010f77..585ac049a 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -151,6 +151,8 @@ private:
151 /// This timer is used to redraw the widget's contents continuously. To save resources, it only 151 /// This timer is used to redraw the widget's contents continuously. To save resources, it only
152 /// runs while the widget is visible. 152 /// runs while the widget is visible.
153 QTimer update_timer; 153 QTimer update_timer;
154 /// Scale the coordinate system appropriately when physical DPI != logical DPI.
155 qreal x_scale, y_scale;
154}; 156};
155 157
156#endif 158#endif
@@ -220,11 +222,17 @@ MicroProfileWidget::MicroProfileWidget(QWidget* parent) : QWidget(parent) {
220 MicroProfileInitUI(); 222 MicroProfileInitUI();
221 223
222 connect(&update_timer, SIGNAL(timeout()), SLOT(update())); 224 connect(&update_timer, SIGNAL(timeout()), SLOT(update()));
225
226 QPainter painter(this);
227 x_scale = qreal(painter.device()->physicalDpiX()) / qreal(painter.device()->logicalDpiX());
228 y_scale = qreal(painter.device()->physicalDpiY()) / qreal(painter.device()->logicalDpiY());
223} 229}
224 230
225void MicroProfileWidget::paintEvent(QPaintEvent* ev) { 231void MicroProfileWidget::paintEvent(QPaintEvent* ev) {
226 QPainter painter(this); 232 QPainter painter(this);
227 233
234 painter.scale(x_scale, y_scale);
235
228 painter.setBackground(Qt::black); 236 painter.setBackground(Qt::black);
229 painter.eraseRect(rect()); 237 painter.eraseRect(rect());
230 238
@@ -248,24 +256,24 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) {
248} 256}
249 257
250void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { 258void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
251 MicroProfileMousePosition(ev->x(), ev->y(), 0); 259 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
252 ev->accept(); 260 ev->accept();
253} 261}
254 262
255void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { 263void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
256 MicroProfileMousePosition(ev->x(), ev->y(), 0); 264 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
257 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 265 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
258 ev->accept(); 266 ev->accept();
259} 267}
260 268
261void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { 269void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
262 MicroProfileMousePosition(ev->x(), ev->y(), 0); 270 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
263 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 271 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
264 ev->accept(); 272 ev->accept();
265} 273}
266 274
267void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { 275void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
268 MicroProfileMousePosition(ev->x(), ev->y(), ev->delta() / 120); 276 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120);
269 ev->accept(); 277 ev->accept();
270} 278}
271 279
diff --git a/src/common/swap.h b/src/common/swap.h
index a7c37bc44..1749bd7a4 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -25,6 +25,8 @@
25 #include <sys/endian.h> 25 #include <sys/endian.h>
26#endif 26#endif
27 27
28#include <cstring>
29
28#include "common/common_types.h" 30#include "common/common_types.h"
29 31
30// GCC 4.6+ 32// GCC 4.6+
@@ -58,9 +60,6 @@
58 60
59namespace Common { 61namespace Common {
60 62
61inline u8 swap8(u8 _data) {return _data;}
62inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];}
63
64#ifdef _MSC_VER 63#ifdef _MSC_VER
65inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} 64inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
66inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} 65inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
@@ -92,52 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3
92#endif 91#endif
93 92
94inline float swapf(float f) { 93inline float swapf(float f) {
95 union { 94 static_assert(sizeof(u32) == sizeof(float),
96 float f; 95 "float must be the same size as uint32_t.");
97 unsigned int u32;
98 } dat1, dat2;
99
100 dat1.f = f;
101 dat2.u32 = swap32(dat1.u32);
102 96
103 return dat2.f; 97 u32 value;
104} 98 std::memcpy(&value, &f, sizeof(u32));
105
106inline double swapd(double f) {
107 union {
108 double f;
109 unsigned long long u64;
110 } dat1, dat2;
111 99
112 dat1.f = f; 100 value = swap32(value);
113 dat2.u64 = swap64(dat1.u64); 101 std::memcpy(&f, &value, sizeof(u32));
114 102
115 return dat2.f; 103 return f;
116} 104}
117 105
118inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} 106inline double swapd(double f) {
119inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} 107 static_assert(sizeof(u64) == sizeof(double),
120inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} 108 "double must be the same size as uint64_t.");
121
122template <int count>
123void swap(u8*);
124 109
125template <> 110 u64 value;
126inline void swap<1>(u8* data) { } 111 std::memcpy(&value, &f, sizeof(u64));
127 112
128template <> 113 value = swap64(value);
129inline void swap<2>(u8* data) { 114 std::memcpy(&f, &value, sizeof(u64));
130 *reinterpret_cast<u16*>(data) = swap16(data);
131}
132
133template <>
134inline void swap<4>(u8* data) {
135 *reinterpret_cast<u32*>(data) = swap32(data);
136}
137 115
138template <> 116 return f;
139inline void swap<8>(u8* data) {
140 *reinterpret_cast<u64*>(data) = swap64(data);
141} 117}
142 118
143} // Namespace Common 119} // Namespace Common
@@ -534,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t<T, F> v) {
534template <typename T> 510template <typename T>
535struct swap_64_t { 511struct swap_64_t {
536 static T swap(T x) { 512 static T swap(T x) {
537 return (T)Common::swap64(*(u64 *)&x); 513 return static_cast<T>(Common::swap64(x));
538 } 514 }
539}; 515};
540 516
541template <typename T> 517template <typename T>
542struct swap_32_t { 518struct swap_32_t {
543 static T swap(T x) { 519 static T swap(T x) {
544 return (T)Common::swap32(*(u32 *)&x); 520 return static_cast<T>(Common::swap32(x));
545 } 521 }
546}; 522};
547 523
548template <typename T> 524template <typename T>
549struct swap_16_t { 525struct swap_16_t {
550 static T swap(T x) { 526 static T swap(T x) {
551 return (T)Common::swap16(*(u16 *)&x); 527 return static_cast<T>(Common::swap16(x));
552 } 528 }
553}; 529};
554 530
555template <typename T> 531template <typename T>
556struct swap_float_t { 532struct swap_float_t {
557 static T swap(T x) { 533 static T swap(T x) {
558 return (T)Common::swapf(*(float *)&x); 534 return static_cast<T>(Common::swapf(x));
559 } 535 }
560}; 536};
561 537
562template <typename T> 538template <typename T>
563struct swap_double_t { 539struct swap_double_t {
564 static T swap(T x) { 540 static T swap(T x) {
565 return (T)Common::swapd(*(double *)&x); 541 return static_cast<T>(Common::swapd(x));
566 } 542 }
567}; 543};
568 544
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index a8d891689..12080a802 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -42,6 +42,7 @@ set(SRCS
42 hle/kernel/timer.cpp 42 hle/kernel/timer.cpp
43 hle/kernel/vm_manager.cpp 43 hle/kernel/vm_manager.cpp
44 hle/service/ac_u.cpp 44 hle/service/ac_u.cpp
45 hle/service/act_a.cpp
45 hle/service/act_u.cpp 46 hle/service/act_u.cpp
46 hle/service/am/am.cpp 47 hle/service/am/am.cpp
47 hle/service/am/am_app.cpp 48 hle/service/am/am_app.cpp
@@ -52,6 +53,7 @@ set(SRCS
52 hle/service/apt/apt_a.cpp 53 hle/service/apt/apt_a.cpp
53 hle/service/apt/apt_s.cpp 54 hle/service/apt/apt_s.cpp
54 hle/service/apt/apt_u.cpp 55 hle/service/apt/apt_u.cpp
56 hle/service/apt/bcfnt/bcfnt.cpp
55 hle/service/boss/boss.cpp 57 hle/service/boss/boss.cpp
56 hle/service/boss/boss_p.cpp 58 hle/service/boss/boss_p.cpp
57 hle/service/boss/boss_u.cpp 59 hle/service/boss/boss_u.cpp
@@ -175,6 +177,7 @@ set(HEADERS
175 hle/kernel/vm_manager.h 177 hle/kernel/vm_manager.h
176 hle/result.h 178 hle/result.h
177 hle/service/ac_u.h 179 hle/service/ac_u.h
180 hle/service/act_a.h
178 hle/service/act_u.h 181 hle/service/act_u.h
179 hle/service/am/am.h 182 hle/service/am/am.h
180 hle/service/am/am_app.h 183 hle/service/am/am_app.h
@@ -185,6 +188,7 @@ set(HEADERS
185 hle/service/apt/apt_a.h 188 hle/service/apt/apt_a.h
186 hle/service/apt/apt_s.h 189 hle/service/apt/apt_s.h
187 hle/service/apt/apt_u.h 190 hle/service/apt/apt_u.h
191 hle/service/apt/bcfnt/bcfnt.h
188 hle/service/boss/boss.h 192 hle/service/boss/boss.h
189 hle/service/boss/boss_p.h 193 hle/service/boss/boss_p.h
190 hle/service/boss/boss_u.h 194 hle/service/boss/boss_u.h
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 533067d4f..d8abe5aeb 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -6,6 +6,7 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/arm/skyeye_common/arm_regformat.h" 8#include "core/arm/skyeye_common/arm_regformat.h"
9#include "core/arm/skyeye_common/vfp/asm_vfp.h"
9 10
10namespace Core { 11namespace Core {
11 struct ThreadContext; 12 struct ThreadContext;
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp
index a3581132c..13492a08b 100644
--- a/src/core/arm/dyncom/arm_dyncom.cpp
+++ b/src/core/arm/dyncom/arm_dyncom.cpp
@@ -93,7 +93,7 @@ void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 e
93 context.cpu_registers[0] = arg; 93 context.cpu_registers[0] = arg;
94 context.pc = entry_point; 94 context.pc = entry_point;
95 context.sp = stack_top; 95 context.sp = stack_top;
96 context.cpsr = 0x1F | ((entry_point & 1) << 5); // Usermode and THUMB mode 96 context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode
97} 97}
98 98
99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { 99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) {
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 8d4b26815..cfc67287f 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -5527,28 +5527,32 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
5527 5527
5528 // SMUAD and SMLAD 5528 // SMUAD and SMLAD
5529 if (BIT(op2, 1) == 0) { 5529 if (BIT(op2, 1) == 0) {
5530 RD = (product1 + product2); 5530 u32 rd_val = (product1 + product2);
5531 5531
5532 if (inst_cream->Ra != 15) { 5532 if (inst_cream->Ra != 15) {
5533 RD += cpu->Reg[inst_cream->Ra]; 5533 rd_val += cpu->Reg[inst_cream->Ra];
5534 5534
5535 if (ARMul_AddOverflowQ(product1 + product2, cpu->Reg[inst_cream->Ra])) 5535 if (ARMul_AddOverflowQ(product1 + product2, cpu->Reg[inst_cream->Ra]))
5536 cpu->Cpsr |= (1 << 27); 5536 cpu->Cpsr |= (1 << 27);
5537 } 5537 }
5538 5538
5539 RD = rd_val;
5540
5539 if (ARMul_AddOverflowQ(product1, product2)) 5541 if (ARMul_AddOverflowQ(product1, product2))
5540 cpu->Cpsr |= (1 << 27); 5542 cpu->Cpsr |= (1 << 27);
5541 } 5543 }
5542 // SMUSD and SMLSD 5544 // SMUSD and SMLSD
5543 else { 5545 else {
5544 RD = (product1 - product2); 5546 u32 rd_val = (product1 - product2);
5545 5547
5546 if (inst_cream->Ra != 15) { 5548 if (inst_cream->Ra != 15) {
5547 RD += cpu->Reg[inst_cream->Ra]; 5549 rd_val += cpu->Reg[inst_cream->Ra];
5548 5550
5549 if (ARMul_AddOverflowQ(product1 - product2, cpu->Reg[inst_cream->Ra])) 5551 if (ARMul_AddOverflowQ(product1 - product2, cpu->Reg[inst_cream->Ra]))
5550 cpu->Cpsr |= (1 << 27); 5552 cpu->Cpsr |= (1 << 27);
5551 } 5553 }
5554
5555 RD = rd_val;
5552 } 5556 }
5553 } 5557 }
5554 5558
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 1360ee845..820b19e1a 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -437,7 +437,7 @@ static void HandleSetThread() {
437 * 437 *
438 * @param signal Signal to be sent to client. 438 * @param signal Signal to be sent to client.
439 */ 439 */
440void SendSignal(u32 signal) { 440static void SendSignal(u32 signal) {
441 if (gdbserver_socket == -1) { 441 if (gdbserver_socket == -1) {
442 return; 442 return;
443 } 443 }
@@ -713,7 +713,7 @@ static void Continue() {
713 * @param addr Address of breakpoint. 713 * @param addr Address of breakpoint.
714 * @param len Length of breakpoint. 714 * @param len Length of breakpoint.
715 */ 715 */
716bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { 716static bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) {
717 std::map<u32, Breakpoint>& p = GetBreakpointList(type); 717 std::map<u32, Breakpoint>& p = GetBreakpointList(type);
718 718
719 Breakpoint breakpoint; 719 Breakpoint breakpoint;
@@ -907,7 +907,7 @@ void ToggleServer(bool status) {
907 } 907 }
908} 908}
909 909
910void Init(u16 port) { 910static void Init(u16 port) {
911 if (!g_server_enabled) { 911 if (!g_server_enabled) {
912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution. 912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution.
913 // This way the CPU can still execute normally. 913 // This way the CPU can still execute normally.
diff --git a/src/core/hle/applets/applet.h b/src/core/hle/applets/applet.h
index af442f81d..754c6f7db 100644
--- a/src/core/hle/applets/applet.h
+++ b/src/core/hle/applets/applet.h
@@ -65,6 +65,7 @@ protected:
65 virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0; 65 virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0;
66 66
67 Service::APT::AppletId id; ///< Id of this Applet 67 Service::APT::AppletId id; ///< Id of this Applet
68 std::shared_ptr<std::vector<u8>> heap_memory; ///< Heap memory for this Applet
68}; 69};
69 70
70/// Returns whether a library applet is currently running 71/// Returns whether a library applet is currently running
diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp
index b4456ca90..bf39eca22 100644
--- a/src/core/hle/applets/mii_selector.cpp
+++ b/src/core/hle/applets/mii_selector.cpp
@@ -35,9 +35,14 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p
35 ASSERT(sizeof(capture_info) == parameter.buffer_size); 35 ASSERT(sizeof(capture_info) == parameter.buffer_size);
36 36
37 memcpy(&capture_info, parameter.data, sizeof(capture_info)); 37 memcpy(&capture_info, parameter.data, sizeof(capture_info));
38
38 using Kernel::MemoryPermission; 39 using Kernel::MemoryPermission;
39 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, 40 // Allocate a heap block of the required size for this applet.
40 MemoryPermission::ReadWrite, "MiiSelector Memory"); 41 heap_memory = std::make_shared<std::vector<u8>>(capture_info.size);
42 // Create a SharedMemory that directly points to this heap block.
43 framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(),
44 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
45 "MiiSelector Memory");
41 46
42 // Send the response message with the newly created SharedMemory 47 // Send the response message with the newly created SharedMemory
43 Service::APT::MessageParameter result; 48 Service::APT::MessageParameter result;
diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp
index 87238aa1c..90c6adc65 100644
--- a/src/core/hle/applets/swkbd.cpp
+++ b/src/core/hle/applets/swkbd.cpp
@@ -40,8 +40,12 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con
40 memcpy(&capture_info, parameter.data, sizeof(capture_info)); 40 memcpy(&capture_info, parameter.data, sizeof(capture_info));
41 41
42 using Kernel::MemoryPermission; 42 using Kernel::MemoryPermission;
43 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, 43 // Allocate a heap block of the required size for this applet.
44 MemoryPermission::ReadWrite, "SoftwareKeyboard Memory"); 44 heap_memory = std::make_shared<std::vector<u8>>(capture_info.size);
45 // Create a SharedMemory that directly points to this heap block.
46 framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(),
47 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
48 "SoftwareKeyboard Memory");
45 49
46 // Send the response message with the newly created SharedMemory 50 // Send the response message with the newly created SharedMemory
47 Service::APT::MessageParameter result; 51 Service::APT::MessageParameter result;
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index 4d718b681..bf7f875b6 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -170,7 +170,8 @@ template<ResultCode func(s64*, u32, s32)> void Wrap() {
170 170
171template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() { 171template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() {
172 u32 param_1 = 0; 172 u32 param_1 = 0;
173 u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3), PARAM(4)).raw; 173 // The last parameter is passed in R0 instead of R4
174 u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3), PARAM(0)).raw;
174 Core::g_app_core->SetReg(1, param_1); 175 Core::g_app_core->SetReg(1, param_1);
175 FuncReturn(retval); 176 FuncReturn(retval);
176} 177}
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 862643448..17ae87aef 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -55,6 +55,9 @@ void MemoryInit(u32 mem_type) {
55 memory_regions[i].size = memory_region_sizes[mem_type][i]; 55 memory_regions[i].size = memory_region_sizes[mem_type][i];
56 memory_regions[i].used = 0; 56 memory_regions[i].used = 0;
57 memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); 57 memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>();
58 // Reserve enough space for this region of FCRAM.
59 // We do not want this block of memory to be relocated when allocating from it.
60 memory_regions[i].linear_heap_memory->reserve(memory_regions[i].size);
58 61
59 base += memory_regions[i].size; 62 base += memory_regions[i].size;
60 } 63 }
@@ -107,9 +110,7 @@ struct MemoryArea {
107 110
108// We don't declare the IO regions in here since its handled by other means. 111// We don't declare the IO regions in here since its handled by other means.
109static MemoryArea memory_areas[] = { 112static MemoryArea memory_areas[] = {
110 {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
111 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) 113 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
112 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
113}; 114};
114 115
115} 116}
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 0546f6e16..69302cc82 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -209,7 +209,7 @@ ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission p
209 return ERR_INVALID_ADDRESS; 209 return ERR_INVALID_ADDRESS;
210 } 210 }
211 211
212 // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its 212 // Expansion of the linear heap is only allowed if you do an allocation immediately at its
213 // end. It's possible to free gaps in the middle of the heap and then reallocate them later, 213 // end. It's possible to free gaps in the middle of the heap and then reallocate them later,
214 // but expansions are only allowed at the end. 214 // but expansions are only allowed at the end.
215 if (target == heap_end) { 215 if (target == heap_end) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index a06afef2b..d781ef32c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -142,8 +142,11 @@ public:
142 142
143 MemoryRegionInfo* memory_region = nullptr; 143 MemoryRegionInfo* memory_region = nullptr;
144 144
145 /// Bitmask of the used TLS slots 145 /// The Thread Local Storage area is allocated as processes create threads,
146 std::bitset<300> used_tls_slots; 146 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
147 /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask.
148 /// This vector will grow as more pages are allocated for new threads.
149 std::vector<std::bitset<8>> tls_slots;
147 150
148 VAddr GetLinearHeapAreaAddress() const; 151 VAddr GetLinearHeapAreaAddress() const;
149 VAddr GetLinearHeapBase() const; 152 VAddr GetLinearHeapBase() const;
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index d90f0f00f..6a22c8986 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -7,6 +7,7 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8 8
9#include "core/memory.h" 9#include "core/memory.h"
10#include "core/hle/kernel/memory.h"
10#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
11 12
12namespace Kernel { 13namespace Kernel {
@@ -14,93 +15,157 @@ namespace Kernel {
14SharedMemory::SharedMemory() {} 15SharedMemory::SharedMemory() {}
15SharedMemory::~SharedMemory() {} 16SharedMemory::~SharedMemory() {}
16 17
17SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissions, 18SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions,
18 MemoryPermission other_permissions, std::string name) { 19 MemoryPermission other_permissions, VAddr address, MemoryRegion region, std::string name) {
19 SharedPtr<SharedMemory> shared_memory(new SharedMemory); 20 SharedPtr<SharedMemory> shared_memory(new SharedMemory);
20 21
22 shared_memory->owner_process = owner_process;
21 shared_memory->name = std::move(name); 23 shared_memory->name = std::move(name);
22 shared_memory->base_address = 0x0;
23 shared_memory->fixed_address = 0x0;
24 shared_memory->size = size; 24 shared_memory->size = size;
25 shared_memory->permissions = permissions; 25 shared_memory->permissions = permissions;
26 shared_memory->other_permissions = other_permissions; 26 shared_memory->other_permissions = other_permissions;
27 27
28 if (address == 0) {
29 // We need to allocate a block from the Linear Heap ourselves.
30 // We'll manually allocate some memory from the linear heap in the specified region.
31 MemoryRegionInfo* memory_region = GetMemoryRegion(region);
32 auto& linheap_memory = memory_region->linear_heap_memory;
33
34 ASSERT_MSG(linheap_memory->size() + size <= memory_region->size, "Not enough space in region to allocate shared memory!");
35
36 shared_memory->backing_block = linheap_memory;
37 shared_memory->backing_block_offset = linheap_memory->size();
38 // Allocate some memory from the end of the linear heap for this region.
39 linheap_memory->insert(linheap_memory->end(), size, 0);
40 memory_region->used += size;
41
42 shared_memory->linear_heap_phys_address = Memory::FCRAM_PADDR + memory_region->base + shared_memory->backing_block_offset;
43
44 // Increase the amount of used linear heap memory for the owner process.
45 if (shared_memory->owner_process != nullptr) {
46 shared_memory->owner_process->linear_heap_used += size;
47 }
48
49 // Refresh the address mappings for the current process.
50 if (Kernel::g_current_process != nullptr) {
51 Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
52 }
53 } else {
54 // TODO(Subv): What happens if an application tries to create multiple memory blocks pointing to the same address?
55 auto& vm_manager = shared_memory->owner_process->vm_manager;
56 // The memory is already available and mapped in the owner process.
57 auto vma = vm_manager.FindVMA(address)->second;
58 // Copy it over to our own storage
59 shared_memory->backing_block = std::make_shared<std::vector<u8>>(vma.backing_block->data() + vma.offset,
60 vma.backing_block->data() + vma.offset + size);
61 shared_memory->backing_block_offset = 0;
62 // Unmap the existing pages
63 vm_manager.UnmapRange(address, size);
64 // Map our own block into the address space
65 vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size, MemoryState::Shared);
66 // Reprotect the block with the new permissions
67 vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions));
68 }
69
70 shared_memory->base_address = address;
28 return shared_memory; 71 return shared_memory;
29} 72}
30 73
31ResultCode SharedMemory::Map(VAddr address, MemoryPermission permissions, 74SharedPtr<SharedMemory> SharedMemory::CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size,
32 MemoryPermission other_permissions) { 75 MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
76 SharedPtr<SharedMemory> shared_memory(new SharedMemory);
33 77
34 if (base_address != 0) { 78 shared_memory->owner_process = nullptr;
35 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: already mapped at 0x%08X!", 79 shared_memory->name = std::move(name);
36 GetObjectId(), address, name.c_str(), base_address); 80 shared_memory->size = size;
37 // TODO: Verify error code with hardware 81 shared_memory->permissions = permissions;
38 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, 82 shared_memory->other_permissions = other_permissions;
39 ErrorSummary::InvalidArgument, ErrorLevel::Permanent); 83 shared_memory->backing_block = heap_block;
40 } 84 shared_memory->backing_block_offset = offset;
85 shared_memory->base_address = Memory::HEAP_VADDR + offset;
41 86
42 // TODO(Subv): Return E0E01BEE when permissions and other_permissions don't 87 return shared_memory;
43 // match what was specified when the memory block was created. 88}
44 89
45 // TODO(Subv): Return E0E01BEE when address should be 0. 90ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermission permissions,
46 // Note: Find out when that's the case. 91 MemoryPermission other_permissions) {
47 92
48 if (fixed_address != 0) { 93 MemoryPermission own_other_permissions = target_process == owner_process ? this->permissions : this->other_permissions;
49 if (address != 0 && address != fixed_address) {
50 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: fixed_addres is 0x%08X!",
51 GetObjectId(), address, name.c_str(), fixed_address);
52 // TODO: Verify error code with hardware
53 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
54 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
55 }
56 94
57 // HACK(yuriks): This is only here to support the APT shared font mapping right now. 95 // Automatically allocated memory blocks can only be mapped with other_permissions = DontCare
58 // Later, this should actually map the memory block onto the address space. 96 if (base_address == 0 && other_permissions != MemoryPermission::DontCare) {
59 return RESULT_SUCCESS; 97 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
60 } 98 }
61 99
62 if (address < Memory::SHARED_MEMORY_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) { 100 // Error out if the requested permissions don't match what the creator process allows.
63 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s outside of shared mem bounds!", 101 if (static_cast<u32>(permissions) & ~static_cast<u32>(own_other_permissions)) {
64 GetObjectId(), address, name.c_str()); 102 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
65 // TODO: Verify error code with hardware 103 GetObjectId(), address, name.c_str());
66 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, 104 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
67 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
68 } 105 }
69 106
70 // TODO: Test permissions 107 // Heap-backed memory blocks can not be mapped with other_permissions = DontCare
108 if (base_address != 0 && other_permissions == MemoryPermission::DontCare) {
109 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
110 GetObjectId(), address, name.c_str());
111 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
112 }
71 113
72 // HACK: Since there's no way to write to the memory block without mapping it onto the game 114 // Error out if the provided permissions are not compatible with what the creator process needs.
73 // process yet, at least initialize memory the first time it's mapped. 115 if (other_permissions != MemoryPermission::DontCare &&
74 if (address != this->base_address) { 116 static_cast<u32>(this->permissions) & ~static_cast<u32>(other_permissions)) {
75 std::memset(Memory::GetPointer(address), 0, size); 117 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
118 GetObjectId(), address, name.c_str());
119 return ResultCode(ErrorDescription::WrongPermission, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent);
76 } 120 }
77 121
78 this->base_address = address; 122 // TODO(Subv): Check for the Shared Device Mem flag in the creator process.
123 /*if (was_created_with_shared_device_mem && address != 0) {
124 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
125 }*/
79 126
80 return RESULT_SUCCESS; 127 // TODO(Subv): The same process that created a SharedMemory object
81} 128 // can not map it in its own address space unless it was created with addr=0, result 0xD900182C.
82 129
83ResultCode SharedMemory::Unmap(VAddr address) { 130 if (address != 0) {
84 if (base_address == 0) { 131 if (address < Memory::HEAP_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) {
85 // TODO(Subv): Verify what actually happens when you want to unmap a memory block that 132 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, invalid address",
86 // was originally mapped with address = 0 133 GetObjectId(), address, name.c_str());
87 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); 134 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
135 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
136 }
88 } 137 }
89 138
90 if (base_address != address) 139 VAddr target_address = address;
91 return ResultCode(ErrorDescription::WrongAddress, ErrorModule::OS, ErrorSummary::InvalidState, ErrorLevel::Usage);
92 140
93 base_address = 0; 141 if (base_address == 0 && target_address == 0) {
142 // Calculate the address at which to map the memory block.
143 target_address = Memory::PhysicalToVirtualAddress(linear_heap_phys_address);
144 }
145
146 // Map the memory block into the target process
147 auto result = target_process->vm_manager.MapMemoryBlock(target_address, backing_block, backing_block_offset, size, MemoryState::Shared);
148 if (result.Failed()) {
149 LOG_ERROR(Kernel, "cannot map id=%u, target_address=0x%08X name=%s, error mapping to virtual memory",
150 GetObjectId(), target_address, name.c_str());
151 return result.Code();
152 }
94 153
95 return RESULT_SUCCESS; 154 return target_process->vm_manager.ReprotectRange(target_address, size, ConvertPermissions(permissions));
96} 155}
97 156
98u8* SharedMemory::GetPointer(u32 offset) { 157ResultCode SharedMemory::Unmap(Process* target_process, VAddr address) {
99 if (base_address != 0) 158 // TODO(Subv): Verify what happens if the application tries to unmap an address that is not mapped to a SharedMemory.
100 return Memory::GetPointer(base_address + offset); 159 return target_process->vm_manager.UnmapRange(address, size);
160}
161
162VMAPermission SharedMemory::ConvertPermissions(MemoryPermission permission) {
163 u32 masked_permissions = static_cast<u32>(permission) & static_cast<u32>(MemoryPermission::ReadWriteExecute);
164 return static_cast<VMAPermission>(masked_permissions);
165};
101 166
102 LOG_ERROR(Kernel_SVC, "memory block id=%u not mapped!", GetObjectId()); 167u8* SharedMemory::GetPointer(u32 offset) {
103 return nullptr; 168 return backing_block->data() + backing_block_offset + offset;
104} 169}
105 170
106} // namespace 171} // namespace
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index b51049ad0..0c404a9f8 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10 10
11#include "core/hle/kernel/kernel.h" 11#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/process.h"
12#include "core/hle/result.h" 13#include "core/hle/result.h"
13 14
14namespace Kernel { 15namespace Kernel {
@@ -29,14 +30,29 @@ enum class MemoryPermission : u32 {
29class SharedMemory final : public Object { 30class SharedMemory final : public Object {
30public: 31public:
31 /** 32 /**
32 * Creates a shared memory object 33 * Creates a shared memory object.
34 * @param owner_process Process that created this shared memory object.
33 * @param size Size of the memory block. Must be page-aligned. 35 * @param size Size of the memory block. Must be page-aligned.
34 * @param permissions Permission restrictions applied to the process which created the block. 36 * @param permissions Permission restrictions applied to the process which created the block.
35 * @param other_permissions Permission restrictions applied to other processes mapping the block. 37 * @param other_permissions Permission restrictions applied to other processes mapping the block.
38 * @param address The address from which to map the Shared Memory.
39 * @param region If the address is 0, the shared memory will be allocated in this region of the linear heap.
36 * @param name Optional object name, used for debugging purposes. 40 * @param name Optional object name, used for debugging purposes.
37 */ 41 */
38 static SharedPtr<SharedMemory> Create(u32 size, MemoryPermission permissions, 42 static SharedPtr<SharedMemory> Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions,
39 MemoryPermission other_permissions, std::string name = "Unknown"); 43 MemoryPermission other_permissions, VAddr address = 0, MemoryRegion region = MemoryRegion::BASE, std::string name = "Unknown");
44
45 /**
46 * Creates a shared memory object from a block of memory managed by an HLE applet.
47 * @param heap_block Heap block of the HLE applet.
48 * @param offset The offset into the heap block that the SharedMemory will map.
49 * @param size Size of the memory block. Must be page-aligned.
50 * @param permissions Permission restrictions applied to the process which created the block.
51 * @param other_permissions Permission restrictions applied to other processes mapping the block.
52 * @param name Optional object name, used for debugging purposes.
53 */
54 static SharedPtr<SharedMemory> CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size,
55 MemoryPermission permissions, MemoryPermission other_permissions, std::string name = "Unknown Applet");
40 56
41 std::string GetTypeName() const override { return "SharedMemory"; } 57 std::string GetTypeName() const override { return "SharedMemory"; }
42 std::string GetName() const override { return name; } 58 std::string GetName() const override { return name; }
@@ -45,19 +61,27 @@ public:
45 HandleType GetHandleType() const override { return HANDLE_TYPE; } 61 HandleType GetHandleType() const override { return HANDLE_TYPE; }
46 62
47 /** 63 /**
48 * Maps a shared memory block to an address in system memory 64 * Converts the specified MemoryPermission into the equivalent VMAPermission.
65 * @param permission The MemoryPermission to convert.
66 */
67 static VMAPermission ConvertPermissions(MemoryPermission permission);
68
69 /**
70 * Maps a shared memory block to an address in the target process' address space
71 * @param target_process Process on which to map the memory block.
49 * @param address Address in system memory to map shared memory block to 72 * @param address Address in system memory to map shared memory block to
50 * @param permissions Memory block map permissions (specified by SVC field) 73 * @param permissions Memory block map permissions (specified by SVC field)
51 * @param other_permissions Memory block map other permissions (specified by SVC field) 74 * @param other_permissions Memory block map other permissions (specified by SVC field)
52 */ 75 */
53 ResultCode Map(VAddr address, MemoryPermission permissions, MemoryPermission other_permissions); 76 ResultCode Map(Process* target_process, VAddr address, MemoryPermission permissions, MemoryPermission other_permissions);
54 77
55 /** 78 /**
56 * Unmaps a shared memory block from the specified address in system memory 79 * Unmaps a shared memory block from the specified address in system memory
80 * @param target_process Process from which to umap the memory block.
57 * @param address Address in system memory where the shared memory block is mapped 81 * @param address Address in system memory where the shared memory block is mapped
58 * @return Result code of the unmap operation 82 * @return Result code of the unmap operation
59 */ 83 */
60 ResultCode Unmap(VAddr address); 84 ResultCode Unmap(Process* target_process, VAddr address);
61 85
62 /** 86 /**
63 * Gets a pointer to the shared memory block 87 * Gets a pointer to the shared memory block
@@ -66,10 +90,16 @@ public:
66 */ 90 */
67 u8* GetPointer(u32 offset = 0); 91 u8* GetPointer(u32 offset = 0);
68 92
69 /// Address of shared memory block in the process. 93 /// Process that created this shared memory block.
94 SharedPtr<Process> owner_process;
95 /// Address of shared memory block in the owner process if specified.
70 VAddr base_address; 96 VAddr base_address;
71 /// Fixed address to allow mapping to. Used for blocks created from the linear heap. 97 /// Physical address of the shared memory block in the linear heap if no address was specified during creation.
72 VAddr fixed_address; 98 PAddr linear_heap_phys_address;
99 /// Backing memory for this shared memory block.
100 std::shared_ptr<std::vector<u8>> backing_block;
101 /// Offset into the backing block for this shared memory.
102 u32 backing_block_offset;
73 /// Size of the memory block. Page-aligned. 103 /// Size of the memory block. Page-aligned.
74 u32 size; 104 u32 size;
75 /// Permission restrictions applied to the process which created the block. 105 /// Permission restrictions applied to the process which created the block.
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6dc95d0f1..43def6146 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,9 +117,10 @@ void Thread::Stop() {
117 } 117 }
118 wait_objects.clear(); 118 wait_objects.clear();
119 119
120 Kernel::g_current_process->used_tls_slots[tls_index] = false; 120 // Mark the TLS slot in the thread's page as free.
121 g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; 121 u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
122 g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE; 122 u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
123 Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
123 124
124 HLE::Reschedule(__func__); 125 HLE::Reschedule(__func__);
125} 126}
@@ -366,6 +367,31 @@ static void DebugThreadQueue() {
366 } 367 }
367} 368}
368 369
370/**
371 * Finds a free location for the TLS section of a thread.
372 * @param tls_slots The TLS page array of the thread's owner process.
373 * Returns a tuple of (page, slot, alloc_needed) where:
374 * page: The index of the first allocated TLS page that has free slots.
375 * slot: The index of the first free slot in the indicated page.
376 * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
377 */
378std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) {
379 // Iterate over all the allocated pages, and try to find one where not all slots are used.
380 for (unsigned page = 0; page < tls_slots.size(); ++page) {
381 const auto& page_tls_slots = tls_slots[page];
382 if (!page_tls_slots.all()) {
383 // We found a page with at least one free slot, find which slot it is
384 for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) {
385 if (!page_tls_slots.test(slot)) {
386 return std::make_tuple(page, slot, false);
387 }
388 }
389 }
390 }
391
392 return std::make_tuple(0, 0, true);
393}
394
369ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, 395ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority,
370 u32 arg, s32 processor_id, VAddr stack_top) { 396 u32 arg, s32 processor_id, VAddr stack_top) {
371 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { 397 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) {
@@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
403 thread->name = std::move(name); 429 thread->name = std::move(name);
404 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); 430 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
405 thread->owner_process = g_current_process; 431 thread->owner_process = g_current_process;
406 thread->tls_index = -1;
407 thread->waitsynch_waited = false; 432 thread->waitsynch_waited = false;
408 433
409 // Find the next available TLS index, and mark it as used 434 // Find the next available TLS index, and mark it as used
410 auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; 435 auto& tls_slots = Kernel::g_current_process->tls_slots;
411 for (unsigned int i = 0; i < used_tls_slots.size(); ++i) { 436 bool needs_allocation = true;
412 if (used_tls_slots[i] == false) { 437 u32 available_page; // Which allocated page has free space
413 thread->tls_index = i; 438 u32 available_slot; // Which slot within the page is free
414 used_tls_slots[i] = true; 439
415 break; 440 std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots);
441
442 if (needs_allocation) {
443 // There are no already-allocated pages with free slots, lets allocate a new one.
444 // TLS pages are allocated from the BASE region in the linear heap.
445 MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE);
446 auto& linheap_memory = memory_region->linear_heap_memory;
447
448 if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) {
449 LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread");
450 return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent);
416 } 451 }
452
453 u32 offset = linheap_memory->size();
454
455 // Allocate some memory from the end of the linear heap for this region.
456 linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0);
457 memory_region->used += Memory::PAGE_SIZE;
458 Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE;
459
460 tls_slots.emplace_back(0); // The page is completely available at the start
461 available_page = tls_slots.size() - 1;
462 available_slot = 0; // Use the first slot in the new page
463
464 auto& vm_manager = Kernel::g_current_process->vm_manager;
465 vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
466
467 // Map the page to the current process' address space.
468 // TODO(Subv): Find the correct MemoryState for this region.
469 vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
470 linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private);
417 } 471 }
418 472
419 ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); 473 // Mark the slot as used
420 g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; 474 tls_slots[available_page].set(available_slot);
421 g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE; 475 thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
422 476
423 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 477 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
424 // to initialize the context 478 // to initialize the context
@@ -472,6 +526,8 @@ SharedPtr<Thread> SetupMainThread(u32 entry_point, s32 priority) {
472 526
473 SharedPtr<Thread> thread = thread_res.MoveFrom(); 527 SharedPtr<Thread> thread = thread_res.MoveFrom();
474 528
529 thread->context.fpscr = FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO | FPSCR_IXC; // 0x03C00010
530
475 // Run new "main" thread 531 // Run new "main" thread
476 SwitchContext(thread.get()); 532 SwitchContext(thread.get());
477 533
@@ -509,10 +565,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
509 context.cpu_registers[1] = output; 565 context.cpu_registers[1] = output;
510} 566}
511 567
512VAddr Thread::GetTLSAddress() const {
513 return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
514}
515
516//////////////////////////////////////////////////////////////////////////////////////////////////// 568////////////////////////////////////////////////////////////////////////////////////////////////////
517 569
518void ThreadingInit() { 570void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 97ba57fc5..deab5d5a6 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -127,7 +127,7 @@ public:
127 * Returns the Thread Local Storage address of the current thread 127 * Returns the Thread Local Storage address of the current thread
128 * @returns VAddr of the thread's TLS 128 * @returns VAddr of the thread's TLS
129 */ 129 */
130 VAddr GetTLSAddress() const; 130 VAddr GetTLSAddress() const { return tls_address; }
131 131
132 Core::ThreadContext context; 132 Core::ThreadContext context;
133 133
@@ -144,7 +144,7 @@ public:
144 144
145 s32 processor_id; 145 s32 processor_id;
146 146
147 s32 tls_index; ///< Index of the Thread Local Storage of the thread 147 VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread
148 148
149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait 149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
150 150
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 3fc1ab4ee..bfb3327ce 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -17,6 +17,7 @@
17/// Detailed description of the error. This listing is likely incomplete. 17/// Detailed description of the error. This listing is likely incomplete.
18enum class ErrorDescription : u32 { 18enum class ErrorDescription : u32 {
19 Success = 0, 19 Success = 0,
20 WrongPermission = 46,
20 OS_InvalidBufferDescriptor = 48, 21 OS_InvalidBufferDescriptor = 48,
21 WrongAddress = 53, 22 WrongAddress = 53,
22 FS_NotFound = 120, 23 FS_NotFound = 120,
diff --git a/src/core/hle/service/act_a.cpp b/src/core/hle/service/act_a.cpp
new file mode 100644
index 000000000..3a775fa90
--- /dev/null
+++ b/src/core/hle/service/act_a.cpp
@@ -0,0 +1,26 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/act_a.h"
6
7////////////////////////////////////////////////////////////////////////////////////////////////////
8// Namespace ACT_A
9
10namespace ACT_A {
11
12const Interface::FunctionInfo FunctionTable[] = {
13 {0x041300C2, nullptr, "UpdateMiiImage"},
14 {0x041B0142, nullptr, "AgreeEula"},
15 {0x04210042, nullptr, "UploadMii"},
16 {0x04230082, nullptr, "ValidateMailAddress"},
17};
18
19////////////////////////////////////////////////////////////////////////////////////////////////////
20// Interface class
21
22Interface::Interface() {
23 Register(FunctionTable);
24}
25
26} // namespace
diff --git a/src/core/hle/service/act_a.h b/src/core/hle/service/act_a.h
new file mode 100644
index 000000000..765cae644
--- /dev/null
+++ b/src/core/hle/service/act_a.h
@@ -0,0 +1,23 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/service/service.h"
8
9////////////////////////////////////////////////////////////////////////////////////////////////////
10// Namespace ACT_A
11
12namespace ACT_A {
13
14class Interface : public Service::Interface {
15public:
16 Interface();
17
18 std::string GetPortName() const override {
19 return "act:a";
20 }
21};
22
23} // namespace
diff --git a/src/core/hle/service/act_u.cpp b/src/core/hle/service/act_u.cpp
index b23d17fba..05de4d002 100644
--- a/src/core/hle/service/act_u.cpp
+++ b/src/core/hle/service/act_u.cpp
@@ -10,7 +10,10 @@
10namespace ACT_U { 10namespace ACT_U {
11 11
12const Interface::FunctionInfo FunctionTable[] = { 12const Interface::FunctionInfo FunctionTable[] = {
13 {0x00010084, nullptr, "Initialize"},
14 {0x00020040, nullptr, "GetErrorCode"},
13 {0x000600C2, nullptr, "GetAccountDataBlock"}, 15 {0x000600C2, nullptr, "GetAccountDataBlock"},
16 {0x000D0040, nullptr, "GenerateUuid"},
14}; 17};
15 18
16//////////////////////////////////////////////////////////////////////////////////////////////////// 19////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 6d72e8188..73fce6079 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -12,6 +12,7 @@
12#include "core/hle/service/apt/apt_a.h" 12#include "core/hle/service/apt/apt_a.h"
13#include "core/hle/service/apt/apt_s.h" 13#include "core/hle/service/apt/apt_s.h"
14#include "core/hle/service/apt/apt_u.h" 14#include "core/hle/service/apt/apt_u.h"
15#include "core/hle/service/apt/bcfnt/bcfnt.h"
15#include "core/hle/service/fs/archive.h" 16#include "core/hle/service/fs/archive.h"
16 17
17#include "core/hle/kernel/event.h" 18#include "core/hle/kernel/event.h"
@@ -22,23 +23,14 @@
22namespace Service { 23namespace Service {
23namespace APT { 24namespace APT {
24 25
25// Address used for shared font (as observed on HW)
26// TODO(bunnei): This is the hard-coded address where we currently dump the shared font from via
27// https://github.com/citra-emu/3dsutils. This is technically a hack, and will not work at any
28// address other than 0x18000000 due to internal pointers in the shared font dump that would need to
29// be relocated. This might be fixed by dumping the shared font @ address 0x00000000 and then
30// correctly mapping it in Citra, however we still do not understand how the mapping is determined.
31static const VAddr SHARED_FONT_VADDR = 0x18000000;
32
33/// Handle to shared memory region designated to for shared system font 26/// Handle to shared memory region designated to for shared system font
34static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; 27static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
28static bool shared_font_relocated = false;
35 29
36static Kernel::SharedPtr<Kernel::Mutex> lock; 30static Kernel::SharedPtr<Kernel::Mutex> lock;
37static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event 31static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event
38static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event 32static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event
39 33
40static std::shared_ptr<std::vector<u8>> shared_font;
41
42static u32 cpu_percent; ///< CPU time available to the running application 34static u32 cpu_percent; ///< CPU time available to the running application
43 35
44/// Parameter data to be returned in the next call to Glance/ReceiveParameter 36/// Parameter data to be returned in the next call to Glance/ReceiveParameter
@@ -74,23 +66,25 @@ void Initialize(Service::Interface* self) {
74void GetSharedFont(Service::Interface* self) { 66void GetSharedFont(Service::Interface* self) {
75 u32* cmd_buff = Kernel::GetCommandBuffer(); 67 u32* cmd_buff = Kernel::GetCommandBuffer();
76 68
77 if (shared_font != nullptr) { 69 // The shared font has to be relocated to the new address before being passed to the application.
78 // TODO(yuriks): This is a hack to keep this working right now even with our completely 70 VAddr target_address = Memory::PhysicalToVirtualAddress(shared_font_mem->linear_heap_phys_address);
79 // broken shared memory system. 71 // The shared font dumped by 3dsutils (https://github.com/citra-emu/3dsutils) uses this address as base,
80 shared_font_mem->fixed_address = SHARED_FONT_VADDR; 72 // so we relocate it from there to our real address.
81 Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->fixed_address, 73 // TODO(Subv): This address is wrong if the shared font is dumped from a n3DS,
82 shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared); 74 // we need a way to automatically calculate the original address of the font from the file.
83 75 static const VAddr SHARED_FONT_VADDR = 0x18000000;
84 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); 76 if (!shared_font_relocated) {
85 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 77 BCFNT::RelocateSharedFont(shared_font_mem, SHARED_FONT_VADDR, target_address);
86 cmd_buff[2] = SHARED_FONT_VADDR; 78 shared_font_relocated = true;
87 cmd_buff[3] = IPC::MoveHandleDesc();
88 cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom();
89 } else {
90 cmd_buff[0] = IPC::MakeHeader(0x44, 1, 0);
91 cmd_buff[1] = -1; // Generic error (not really possible to verify this on hardware)
92 LOG_ERROR(Kernel_SVC, "called, but %s has not been loaded!", SHARED_FONT);
93 } 79 }
80 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2);
81 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
82 // Since the SharedMemory interface doesn't provide the address at which the memory was allocated,
83 // the real APT service calculates this address by scanning the entire address space (using svcQueryMemory)
84 // and searches for an allocation of the same size as the Shared Font.
85 cmd_buff[2] = target_address;
86 cmd_buff[3] = IPC::MoveHandleDesc();
87 cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom();
94} 88}
95 89
96void NotifyToWait(Service::Interface* self) { 90void NotifyToWait(Service::Interface* self) {
@@ -433,14 +427,12 @@ void Init() {
433 FileUtil::IOFile file(filepath, "rb"); 427 FileUtil::IOFile file(filepath, "rb");
434 428
435 if (file.IsOpen()) { 429 if (file.IsOpen()) {
436 // Read shared font data
437 shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize());
438 file.ReadBytes(shared_font->data(), shared_font->size());
439
440 // Create shared font memory object 430 // Create shared font memory object
441 using Kernel::MemoryPermission; 431 using Kernel::MemoryPermission;
442 shared_font_mem = Kernel::SharedMemory::Create(3 * 1024 * 1024, // 3MB 432 shared_font_mem = Kernel::SharedMemory::Create(nullptr, 0x332000, // 3272 KB
443 MemoryPermission::ReadWrite, MemoryPermission::Read, "APT_U:shared_font_mem"); 433 MemoryPermission::ReadWrite, MemoryPermission::Read, 0, Kernel::MemoryRegion::SYSTEM, "APT:SharedFont");
434 // Read shared font data
435 file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize());
444 } else { 436 } else {
445 LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str()); 437 LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str());
446 shared_font_mem = nullptr; 438 shared_font_mem = nullptr;
@@ -459,8 +451,8 @@ void Init() {
459} 451}
460 452
461void Shutdown() { 453void Shutdown() {
462 shared_font = nullptr;
463 shared_font_mem = nullptr; 454 shared_font_mem = nullptr;
455 shared_font_relocated = false;
464 lock = nullptr; 456 lock = nullptr;
465 notification_event = nullptr; 457 notification_event = nullptr;
466 parameter_event = nullptr; 458 parameter_event = nullptr;
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.cpp b/src/core/hle/service/apt/bcfnt/bcfnt.cpp
new file mode 100644
index 000000000..b0d39d4a5
--- /dev/null
+++ b/src/core/hle/service/apt/bcfnt/bcfnt.cpp
@@ -0,0 +1,71 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/apt/bcfnt/bcfnt.h"
6#include "core/hle/service/service.h"
7
8namespace Service {
9namespace APT {
10namespace BCFNT {
11
12void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address) {
13 static const u32 SharedFontStartOffset = 0x80;
14 u8* data = shared_font->GetPointer(SharedFontStartOffset);
15
16 CFNT cfnt;
17 memcpy(&cfnt, data, sizeof(cfnt));
18
19 // Advance past the header
20 data = shared_font->GetPointer(SharedFontStartOffset + cfnt.header_size);
21
22 for (unsigned block = 0; block < cfnt.num_blocks; ++block) {
23
24 u32 section_size = 0;
25 if (memcmp(data, "FINF", 4) == 0) {
26 BCFNT::FINF finf;
27 memcpy(&finf, data, sizeof(finf));
28 section_size = finf.section_size;
29
30 // Relocate the offsets in the FINF section
31 finf.cmap_offset += new_address - previous_address;
32 finf.cwdh_offset += new_address - previous_address;
33 finf.tglp_offset += new_address - previous_address;
34
35 memcpy(data, &finf, sizeof(finf));
36 } else if (memcmp(data, "CMAP", 4) == 0) {
37 BCFNT::CMAP cmap;
38 memcpy(&cmap, data, sizeof(cmap));
39 section_size = cmap.section_size;
40
41 // Relocate the offsets in the CMAP section
42 cmap.next_cmap_offset += new_address - previous_address;
43
44 memcpy(data, &cmap, sizeof(cmap));
45 } else if (memcmp(data, "CWDH", 4) == 0) {
46 BCFNT::CWDH cwdh;
47 memcpy(&cwdh, data, sizeof(cwdh));
48 section_size = cwdh.section_size;
49
50 // Relocate the offsets in the CWDH section
51 cwdh.next_cwdh_offset += new_address - previous_address;
52
53 memcpy(data, &cwdh, sizeof(cwdh));
54 } else if (memcmp(data, "TGLP", 4) == 0) {
55 BCFNT::TGLP tglp;
56 memcpy(&tglp, data, sizeof(tglp));
57 section_size = tglp.section_size;
58
59 // Relocate the offsets in the TGLP section
60 tglp.sheet_data_offset += new_address - previous_address;
61
62 memcpy(data, &tglp, sizeof(tglp));
63 }
64
65 data += section_size;
66 }
67}
68
69} // namespace BCFNT
70} // namespace APT
71} // namespace Service \ No newline at end of file
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.h b/src/core/hle/service/apt/bcfnt/bcfnt.h
new file mode 100644
index 000000000..388c6bea0
--- /dev/null
+++ b/src/core/hle/service/apt/bcfnt/bcfnt.h
@@ -0,0 +1,87 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/swap.h"
8
9#include "core/hle/kernel/shared_memory.h"
10#include "core/hle/service/service.h"
11
12namespace Service {
13namespace APT {
14namespace BCFNT { ///< BCFNT Shared Font file structures
15
16struct CFNT {
17 u8 magic[4];
18 u16_le endianness;
19 u16_le header_size;
20 u32_le version;
21 u32_le file_size;
22 u32_le num_blocks;
23};
24
25struct FINF {
26 u8 magic[4];
27 u32_le section_size;
28 u8 font_type;
29 u8 line_feed;
30 u16_le alter_char_index;
31 u8 default_width[3];
32 u8 encoding;
33 u32_le tglp_offset;
34 u32_le cwdh_offset;
35 u32_le cmap_offset;
36 u8 height;
37 u8 width;
38 u8 ascent;
39 u8 reserved;
40};
41
42struct TGLP {
43 u8 magic[4];
44 u32_le section_size;
45 u8 cell_width;
46 u8 cell_height;
47 u8 baseline_position;
48 u8 max_character_width;
49 u32_le sheet_size;
50 u16_le num_sheets;
51 u16_le sheet_image_format;
52 u16_le num_columns;
53 u16_le num_rows;
54 u16_le sheet_width;
55 u16_le sheet_height;
56 u32_le sheet_data_offset;
57};
58
59struct CMAP {
60 u8 magic[4];
61 u32_le section_size;
62 u16_le code_begin;
63 u16_le code_end;
64 u16_le mapping_method;
65 u16_le reserved;
66 u32_le next_cmap_offset;
67};
68
69struct CWDH {
70 u8 magic[4];
71 u32_le section_size;
72 u16_le start_index;
73 u16_le end_index;
74 u32_le next_cwdh_offset;
75};
76
77/**
78 * Relocates the internal addresses of the BCFNT Shared Font to the new base.
79 * @param shared_font SharedMemory object that contains the Shared Font
80 * @param previous_address Previous address at which the offsets in the structure were based.
81 * @param new_address New base for the offsets in the structure.
82 */
83void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address);
84
85} // namespace BCFNT
86} // namespace APT
87} // namespace Service
diff --git a/src/core/hle/service/csnd_snd.cpp b/src/core/hle/service/csnd_snd.cpp
index 6318bf2a7..d2bb8941c 100644
--- a/src/core/hle/service/csnd_snd.cpp
+++ b/src/core/hle/service/csnd_snd.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include "common/alignment.h"
6#include "core/hle/hle.h" 7#include "core/hle/hle.h"
7#include "core/hle/kernel/mutex.h" 8#include "core/hle/kernel/mutex.h"
8#include "core/hle/kernel/shared_memory.h" 9#include "core/hle/kernel/shared_memory.h"
@@ -41,14 +42,16 @@ static Kernel::SharedPtr<Kernel::Mutex> mutex = nullptr;
41void Initialize(Service::Interface* self) { 42void Initialize(Service::Interface* self) {
42 u32* cmd_buff = Kernel::GetCommandBuffer(); 43 u32* cmd_buff = Kernel::GetCommandBuffer();
43 44
44 shared_memory = Kernel::SharedMemory::Create(cmd_buff[1], 45 u32 size = Common::AlignUp(cmd_buff[1], Memory::PAGE_SIZE);
45 Kernel::MemoryPermission::ReadWrite, 46 using Kernel::MemoryPermission;
46 Kernel::MemoryPermission::ReadWrite, "CSNDSharedMem"); 47 shared_memory = Kernel::SharedMemory::Create(nullptr, size,
48 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
49 0, Kernel::MemoryRegion::BASE, "CSND:SharedMemory");
47 50
48 mutex = Kernel::Mutex::Create(false); 51 mutex = Kernel::Mutex::Create(false);
49 52
50 cmd_buff[1] = 0; 53 cmd_buff[1] = RESULT_SUCCESS.raw;
51 cmd_buff[2] = 0x4000000; 54 cmd_buff[2] = IPC::MoveHandleDesc(2);
52 cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom(); 55 cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom();
53 cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom(); 56 cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom();
54} 57}
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 274fc751a..10730d7ac 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -440,9 +440,9 @@ static void GetHeadphoneStatus(Service::Interface* self) {
440 440
441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0); 441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
443 cmd_buff[2] = 0; // Not using headphones? 443 cmd_buff[2] = 0; // Not using headphones
444 444
445 LOG_WARNING(Service_DSP, "(STUBBED) called"); 445 LOG_DEBUG(Service_DSP, "called");
446} 446}
447 447
448/** 448/**
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index b4c146e08..8ded9b09b 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -335,8 +335,9 @@ static void RegisterInterruptRelayQueue(Service::Interface* self) {
335 g_interrupt_event->name = "GSP_GPU::interrupt_event"; 335 g_interrupt_event->name = "GSP_GPU::interrupt_event";
336 336
337 using Kernel::MemoryPermission; 337 using Kernel::MemoryPermission;
338 g_shared_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, 338 g_shared_memory = Kernel::SharedMemory::Create(nullptr, 0x1000,
339 MemoryPermission::ReadWrite, "GSPSharedMem"); 339 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
340 0, Kernel::MemoryRegion::BASE, "GSP:SharedMemory");
340 341
341 Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom(); 342 Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom();
342 343
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 1053d0f40..d216cecb4 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -280,8 +280,9 @@ void Init() {
280 AddService(new HID_SPVR_Interface); 280 AddService(new HID_SPVR_Interface);
281 281
282 using Kernel::MemoryPermission; 282 using Kernel::MemoryPermission;
283 shared_mem = SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, 283 shared_mem = SharedMemory::Create(nullptr, 0x1000,
284 MemoryPermission::Read, "HID:SharedMem"); 284 MemoryPermission::ReadWrite, MemoryPermission::Read,
285 0, Kernel::MemoryRegion::BASE, "HID:SharedMemory");
285 286
286 next_pad_index = 0; 287 next_pad_index = 0;
287 next_touch_index = 0; 288 next_touch_index = 0;
diff --git a/src/core/hle/service/ir/ir.cpp b/src/core/hle/service/ir/ir.cpp
index 505c441c6..079a87e48 100644
--- a/src/core/hle/service/ir/ir.cpp
+++ b/src/core/hle/service/ir/ir.cpp
@@ -94,8 +94,9 @@ void Init() {
94 AddService(new IR_User_Interface); 94 AddService(new IR_User_Interface);
95 95
96 using Kernel::MemoryPermission; 96 using Kernel::MemoryPermission;
97 shared_memory = SharedMemory::Create(0x1000, Kernel::MemoryPermission::ReadWrite, 97 shared_memory = SharedMemory::Create(nullptr, 0x1000,
98 Kernel::MemoryPermission::ReadWrite, "IR:SharedMemory"); 98 Kernel::MemoryPermission::ReadWrite, Kernel::MemoryPermission::ReadWrite,
99 0, Kernel::MemoryRegion::BASE, "IR:SharedMemory");
99 transfer_shared_memory = nullptr; 100 transfer_shared_memory = nullptr;
100 101
101 // Create event handle(s) 102 // Create event handle(s)
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 0fe3a4d7a..d7e7d4fe3 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -7,6 +7,7 @@
7 7
8#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
9#include "core/hle/service/ac_u.h" 9#include "core/hle/service/ac_u.h"
10#include "core/hle/service/act_a.h"
10#include "core/hle/service/act_u.h" 11#include "core/hle/service/act_u.h"
11#include "core/hle/service/csnd_snd.h" 12#include "core/hle/service/csnd_snd.h"
12#include "core/hle/service/dlp_srvr.h" 13#include "core/hle/service/dlp_srvr.h"
@@ -119,6 +120,7 @@ void Init() {
119 Service::PTM::Init(); 120 Service::PTM::Init();
120 121
121 AddService(new AC_U::Interface); 122 AddService(new AC_U::Interface);
123 AddService(new ACT_A::Interface);
122 AddService(new ACT_U::Interface); 124 AddService(new ACT_U::Interface);
123 AddService(new CSND_SND::Interface); 125 AddService(new CSND_SND::Interface);
124 AddService(new DLP_SRVR::Interface); 126 AddService(new DLP_SRVR::Interface);
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 60c8747f3..0ce72de87 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -6,6 +6,7 @@
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "common/scope_exit.h"
9#include "common/string_util.h" 10#include "common/string_util.h"
10#include "common/symbols.h" 11#include "common/symbols.h"
11 12
@@ -99,6 +100,7 @@ static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 add
99 switch (operation & MEMOP_OPERATION_MASK) { 100 switch (operation & MEMOP_OPERATION_MASK) {
100 case MEMOP_FREE: 101 case MEMOP_FREE:
101 { 102 {
103 // TODO(Subv): What happens if an application tries to FREE a block of memory that has a SharedMemory pointing to it?
102 if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { 104 if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) {
103 ResultCode result = process.HeapFree(addr0, size); 105 ResultCode result = process.HeapFree(addr0, size);
104 if (result.IsError()) return result; 106 if (result.IsError()) return result;
@@ -160,8 +162,6 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o
160 LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d", 162 LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d",
161 handle, addr, permissions, other_permissions); 163 handle, addr, permissions, other_permissions);
162 164
163 // TODO(Subv): The same process that created a SharedMemory object can not map it in its own address space
164
165 SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle); 165 SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle);
166 if (shared_memory == nullptr) 166 if (shared_memory == nullptr)
167 return ERR_INVALID_HANDLE; 167 return ERR_INVALID_HANDLE;
@@ -176,7 +176,7 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o
176 case MemoryPermission::WriteExecute: 176 case MemoryPermission::WriteExecute:
177 case MemoryPermission::ReadWriteExecute: 177 case MemoryPermission::ReadWriteExecute:
178 case MemoryPermission::DontCare: 178 case MemoryPermission::DontCare:
179 return shared_memory->Map(addr, permissions_type, 179 return shared_memory->Map(Kernel::g_current_process.get(), addr, permissions_type,
180 static_cast<MemoryPermission>(other_permissions)); 180 static_cast<MemoryPermission>(other_permissions));
181 default: 181 default:
182 LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions); 182 LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions);
@@ -196,7 +196,7 @@ static ResultCode UnmapMemoryBlock(Handle handle, u32 addr) {
196 if (shared_memory == nullptr) 196 if (shared_memory == nullptr)
197 return ERR_INVALID_HANDLE; 197 return ERR_INVALID_HANDLE;
198 198
199 return shared_memory->Unmap(addr); 199 return shared_memory->Unmap(Kernel::g_current_process.get(), addr);
200} 200}
201 201
202/// Connect to an OS service given the port name, returns the handle to the port to out 202/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -327,9 +327,9 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou
327 } 327 }
328 } 328 }
329 329
330 HLE::Reschedule(__func__); 330 SCOPE_EXIT({HLE::Reschedule("WaitSynchronizationN");}); // Reschedule after putting the threads to sleep.
331 331
332 // If thread should wait, then set its state to waiting and then reschedule... 332 // If thread should wait, then set its state to waiting
333 if (wait_thread) { 333 if (wait_thread) {
334 334
335 // Actually wait the current thread on each object if we decided to wait... 335 // Actually wait the current thread on each object if we decided to wait...
@@ -503,6 +503,9 @@ static ResultCode CreateThread(Handle* out_handle, s32 priority, u32 entry_point
503 503
504 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create( 504 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create(
505 name, entry_point, priority, arg, processor_id, stack_top)); 505 name, entry_point, priority, arg, processor_id, stack_top));
506
507 thread->context.fpscr = FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO; // 0x03C00000
508
506 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); 509 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread)));
507 510
508 LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, " 511 LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, "
@@ -790,18 +793,44 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32
790 if (size % Memory::PAGE_SIZE != 0) 793 if (size % Memory::PAGE_SIZE != 0)
791 return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); 794 return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
792 795
793 // TODO(Subv): Return E0A01BF5 if the address is not in the application's heap 796 SharedPtr<SharedMemory> shared_memory = nullptr;
794
795 // TODO(Subv): Implement this function properly
796 797
797 using Kernel::MemoryPermission; 798 using Kernel::MemoryPermission;
798 SharedPtr<SharedMemory> shared_memory = SharedMemory::Create(size, 799 auto VerifyPermissions = [](MemoryPermission permission) {
799 (MemoryPermission)my_permission, (MemoryPermission)other_permission); 800 // SharedMemory blocks can not be created with Execute permissions
800 // Map the SharedMemory to the specified address 801 switch (permission) {
801 shared_memory->base_address = addr; 802 case MemoryPermission::None:
803 case MemoryPermission::Read:
804 case MemoryPermission::Write:
805 case MemoryPermission::ReadWrite:
806 case MemoryPermission::DontCare:
807 return true;
808 default:
809 return false;
810 }
811 };
812
813 if (!VerifyPermissions(static_cast<MemoryPermission>(my_permission)) ||
814 !VerifyPermissions(static_cast<MemoryPermission>(other_permission)))
815 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS,
816 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
817
818 if (addr < Memory::PROCESS_IMAGE_VADDR || addr + size > Memory::SHARED_MEMORY_VADDR_END) {
819 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
820 }
821
822 // When trying to create a memory block with address = 0,
823 // if the process has the Shared Device Memory flag in the exheader,
824 // then we have to allocate from the same region as the caller process instead of the BASE region.
825 Kernel::MemoryRegion region = Kernel::MemoryRegion::BASE;
826 if (addr == 0 && Kernel::g_current_process->flags.shared_device_mem)
827 region = Kernel::g_current_process->flags.memory_region;
828
829 shared_memory = SharedMemory::Create(Kernel::g_current_process, size,
830 static_cast<MemoryPermission>(my_permission), static_cast<MemoryPermission>(other_permission), addr, region);
802 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory))); 831 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory)));
803 832
804 LOG_WARNING(Kernel_SVC, "(STUBBED) called addr=0x%08X", addr); 833 LOG_WARNING(Kernel_SVC, "called addr=0x%08X", addr);
805 return RESULT_SUCCESS; 834 return RESULT_SUCCESS;
806} 835}
807 836
diff --git a/src/core/memory.h b/src/core/memory.h
index 9caa3c3f5..126d60471 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -100,15 +100,9 @@ enum : VAddr {
100 SHARED_PAGE_SIZE = 0x00001000, 100 SHARED_PAGE_SIZE = 0x00001000,
101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, 101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
102 102
103 // TODO(yuriks): The size of this area is dynamic, the kernel grows
104 // it as more and more threads are created. For now we'll just use a
105 // hardcoded value.
106 /// Area where TLS (Thread-Local Storage) buffers are allocated. 103 /// Area where TLS (Thread-Local Storage) buffers are allocated.
107 TLS_AREA_VADDR = 0x1FF82000, 104 TLS_AREA_VADDR = 0x1FF82000,
108 TLS_ENTRY_SIZE = 0x200, 105 TLS_ENTRY_SIZE = 0x200,
109 TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
110 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
111
112 106
113 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. 107 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
114 NEW_LINEAR_HEAP_VADDR = 0x30000000, 108 NEW_LINEAR_HEAP_VADDR = 0x30000000,
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
new file mode 100644
index 000000000..457c55571
--- /dev/null
+++ b/src/tests/CMakeLists.txt
@@ -0,0 +1,16 @@
1set(SRCS
2 tests.cpp
3 )
4
5set(HEADERS
6 )
7
8create_directory_groups(${SRCS} ${HEADERS})
9
10include_directories(../../externals/catch/single_include/)
11
12add_executable(tests ${SRCS} ${HEADERS})
13target_link_libraries(tests core video_core audio_core common)
14target_link_libraries(tests ${PLATFORM_LIBRARIES})
15
16add_test(NAME tests COMMAND $<TARGET_FILE:tests>)
diff --git a/src/tests/tests.cpp b/src/tests/tests.cpp
new file mode 100644
index 000000000..73978676f
--- /dev/null
+++ b/src/tests/tests.cpp
@@ -0,0 +1,9 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#define CATCH_CONFIG_MAIN
6#include <catch.hpp>
7
8// Catch provides the main function since we've given it the
9// CATCH_CONFIG_MAIN preprocessor directive.
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 2bc747102..db99ce666 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -75,8 +75,6 @@ static void InitScreenCoordinates(OutputVertex& vtx)
75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); 75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
78 viewport.zscale = float24::FromRaw(regs.viewport_depth_range);
79 viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane);
80 78
81 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 79 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
82 vtx.color *= inv_w; 80 vtx.color *= inv_w;
@@ -89,7 +87,7 @@ static void InitScreenCoordinates(OutputVertex& vtx)
89 87
90 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 88 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
91 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 89 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
92 vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; 90 vtx.screenpos[2] = vtx.pos.z * inv_w;
93} 91}
94 92
95void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { 93void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 941c5af9f..bf4664f9e 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -128,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
128 128
129 // TODO: Verify that this actually modifies the register! 129 // TODO: Verify that this actually modifies the register!
130 if (setup.index < 15) { 130 if (setup.index < 15) {
131 g_state.vs.default_attributes[setup.index] = attribute; 131 g_state.vs_default_attributes[setup.index] = attribute;
132 setup.index++; 132 setup.index++;
133 } else { 133 } else {
134 // Put each attribute into an immediate input buffer. 134 // Put each attribute into an immediate input buffer.
@@ -144,12 +144,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
144 immediate_attribute_id = 0; 144 immediate_attribute_id = 0;
145 145
146 Shader::UnitState<false> shader_unit; 146 Shader::UnitState<false> shader_unit;
147 Shader::Setup(); 147 g_state.vs.Setup();
148 148
149 // Send to vertex shader 149 // Send to vertex shader
150 if (g_debug_context) 150 if (g_debug_context)
151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input)); 151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
152 Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); 152 Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
153 153
154 // Send to renderer 154 // Send to renderer
155 using Pica::Shader::OutputVertex; 155 using Pica::Shader::OutputVertex;
@@ -236,7 +236,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
236 vertex_cache_ids.fill(-1); 236 vertex_cache_ids.fill(-1);
237 237
238 Shader::UnitState<false> shader_unit; 238 Shader::UnitState<false> shader_unit;
239 Shader::Setup(); 239 g_state.vs.Setup();
240 240
241 for (unsigned int index = 0; index < regs.num_vertices; ++index) 241 for (unsigned int index = 0; index < regs.num_vertices; ++index)
242 { 242 {
@@ -273,7 +273,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
273 // Send to vertex shader 273 // Send to vertex shader
274 if (g_debug_context) 274 if (g_debug_context)
275 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); 275 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
276 output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); 276 output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
277 277
278 if (is_indexed) { 278 if (is_indexed) {
279 vertex_cache[vertex_cache_pos] = output; 279 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 2f645b441..871368323 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -696,106 +696,125 @@ finalise:
696#endif 696#endif
697} 697}
698 698
699void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages) 699static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) {
700{ 700 size_t start = input.find(pattern);
701 if (start == std::string::npos)
702 return input;
703
704 std::string ret = input;
705 ret.replace(start, pattern.length(), replacement);
706 return ret;
707}
708
709static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) {
701 using Source = Pica::Regs::TevStageConfig::Source; 710 using Source = Pica::Regs::TevStageConfig::Source;
711 static const std::map<Source, std::string> source_map = {
712 { Source::PrimaryColor, "PrimaryColor" },
713 { Source::PrimaryFragmentColor, "PrimaryFragmentColor" },
714 { Source::SecondaryFragmentColor, "SecondaryFragmentColor" },
715 { Source::Texture0, "Texture0" },
716 { Source::Texture1, "Texture1" },
717 { Source::Texture2, "Texture2" },
718 { Source::Texture3, "Texture3" },
719 { Source::PreviousBuffer, "PreviousBuffer" },
720 { Source::Constant, "Constant" },
721 { Source::Previous, "Previous" },
722 };
723
724 const auto src_it = source_map.find(source);
725 if (src_it == source_map.end())
726 return "Unknown";
727
728 return src_it->second;
729}
730
731static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) {
702 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; 732 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
733 static const std::map<ColorModifier, std::string> color_modifier_map = {
734 { ColorModifier::SourceColor, "%source.rgb" },
735 { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" },
736 { ColorModifier::SourceAlpha, "%source.aaa" },
737 { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" },
738 { ColorModifier::SourceRed, "%source.rrr" },
739 { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" },
740 { ColorModifier::SourceGreen, "%source.ggg" },
741 { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" },
742 { ColorModifier::SourceBlue, "%source.bbb" },
743 { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" },
744 };
745
746 auto src_str = GetTevStageConfigSourceString(source);
747 auto modifier_it = color_modifier_map.find(modifier);
748 std::string modifier_str = "%source.????";
749 if (modifier_it != color_modifier_map.end())
750 modifier_str = modifier_it->second;
751
752 return ReplacePattern(modifier_str, "%source", src_str);
753}
754
755static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) {
703 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; 756 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
757 static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
758 { AlphaModifier::SourceAlpha, "%source.a" },
759 { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" },
760 { AlphaModifier::SourceRed, "%source.r" },
761 { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" },
762 { AlphaModifier::SourceGreen, "%source.g" },
763 { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" },
764 { AlphaModifier::SourceBlue, "%source.b" },
765 { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" },
766 };
767
768 auto src_str = GetTevStageConfigSourceString(source);
769 auto modifier_it = alpha_modifier_map.find(modifier);
770 std::string modifier_str = "%source.????";
771 if (modifier_it != alpha_modifier_map.end())
772 modifier_str = modifier_it->second;
773
774 return ReplacePattern(modifier_str, "%source", src_str);
775}
776
777static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) {
704 using Operation = Pica::Regs::TevStageConfig::Operation; 778 using Operation = Pica::Regs::TevStageConfig::Operation;
779 static const std::map<Operation, std::string> combiner_map = {
780 { Operation::Replace, "%source1" },
781 { Operation::Modulate, "(%source1 * %source2)" },
782 { Operation::Add, "(%source1 + %source2)" },
783 { Operation::AddSigned, "(%source1 + %source2) - 0.5" },
784 { Operation::Lerp, "lerp(%source1, %source2, %source3)" },
785 { Operation::Subtract, "(%source1 - %source2)" },
786 { Operation::Dot3_RGB, "dot(%source1, %source2)" },
787 { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" },
788 { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" },
789 };
705 790
706 std::string stage_info = "Tev setup:\n"; 791 const auto op_it = combiner_map.find(operation);
707 for (size_t index = 0; index < stages.size(); ++index) { 792 if (op_it == combiner_map.end())
708 const auto& tev_stage = stages[index]; 793 return "Unknown op (%source1, %source2, %source3)";
709 794
710 static const std::map<Source, std::string> source_map = { 795 return op_it->second;
711 { Source::PrimaryColor, "PrimaryColor" }, 796}
712 { Source::Texture0, "Texture0" },
713 { Source::Texture1, "Texture1" },
714 { Source::Texture2, "Texture2" },
715 { Source::Constant, "Constant" },
716 { Source::Previous, "Previous" },
717 };
718 797
719 static const std::map<ColorModifier, std::string> color_modifier_map = { 798std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
720 { ColorModifier::SourceColor, { "%source.rgb" } }, 799 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op);
721 { ColorModifier::SourceAlpha, { "%source.aaa" } }, 800 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1));
722 }; 801 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2));
723 static const std::map<AlphaModifier, std::string> alpha_modifier_map = { 802 return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3));
724 { AlphaModifier::SourceAlpha, "%source.a" }, 803}
725 { AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" },
726 };
727 804
728 static const std::map<Operation, std::string> combiner_map = { 805std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
729 { Operation::Replace, "%source1" }, 806 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op);
730 { Operation::Modulate, "(%source1 * %source2) / 255" }, 807 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
731 { Operation::Add, "(%source1 + %source2)" }, 808 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
732 { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, 809 return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
733 }; 810}
734 811
735 static auto ReplacePattern = 812void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) {
736 [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { 813 std::string stage_info = "Tev setup:\n";
737 size_t start = input.find(pattern); 814 for (size_t index = 0; index < stages.size(); ++index) {
738 if (start == std::string::npos) 815 const auto& tev_stage = stages[index];
739 return input; 816 stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n";
740
741 std::string ret = input;
742 ret.replace(start, pattern.length(), replacement);
743 return ret;
744 };
745 static auto GetColorSourceStr =
746 [](const Source& src, const ColorModifier& modifier) {
747 auto src_it = source_map.find(src);
748 std::string src_str = "Unknown";
749 if (src_it != source_map.end())
750 src_str = src_it->second;
751
752 auto modifier_it = color_modifier_map.find(modifier);
753 std::string modifier_str = "%source.????";
754 if (modifier_it != color_modifier_map.end())
755 modifier_str = modifier_it->second;
756
757 return ReplacePattern(modifier_str, "%source", src_str);
758 };
759 static auto GetColorCombinerStr =
760 [](const Regs::TevStageConfig& tev_stage) {
761 auto op_it = combiner_map.find(tev_stage.color_op);
762 std::string op_str = "Unknown op (%source1, %source2, %source3)";
763 if (op_it != combiner_map.end())
764 op_str = op_it->second;
765
766 op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1));
767 op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2));
768 return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3));
769 };
770 static auto GetAlphaSourceStr =
771 [](const Source& src, const AlphaModifier& modifier) {
772 auto src_it = source_map.find(src);
773 std::string src_str = "Unknown";
774 if (src_it != source_map.end())
775 src_str = src_it->second;
776
777 auto modifier_it = alpha_modifier_map.find(modifier);
778 std::string modifier_str = "%source.????";
779 if (modifier_it != alpha_modifier_map.end())
780 modifier_str = modifier_it->second;
781
782 return ReplacePattern(modifier_str, "%source", src_str);
783 };
784 static auto GetAlphaCombinerStr =
785 [](const Regs::TevStageConfig& tev_stage) {
786 auto op_it = combiner_map.find(tev_stage.alpha_op);
787 std::string op_str = "Unknown op (%source1, %source2, %source3)";
788 if (op_it != combiner_map.end())
789 op_str = op_it->second;
790
791 op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
792 op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
793 return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
794 };
795
796 stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n";
797 } 817 }
798
799 LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); 818 LOG_TRACE(HW_GPU, "%s", stage_info.c_str());
800} 819}
801 820
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index f628292a4..92e9734ae 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -224,7 +224,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const Texture
224 224
225void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); 225void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
226 226
227void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 227std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
228std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
229
230/// Dumps the Tev stage config to log at trace level
231void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages);
228 232
229/** 233/**
230 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful. 234 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index be82cf4b5..ec78f9593 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -500,7 +500,7 @@ void Init() {
500} 500}
501 501
502void Shutdown() { 502void Shutdown() {
503 Shader::Shutdown(); 503 Shader::ClearCache();
504} 504}
505 505
506template <typename T> 506template <typename T>
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 5891fb72a..86c0a0096 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -70,7 +70,7 @@ struct Regs {
70 INSERT_PADDING_WORDS(0x9); 70 INSERT_PADDING_WORDS(0x9);
71 71
72 BitField<0, 24, u32> viewport_depth_range; // float24 72 BitField<0, 24, u32> viewport_depth_range; // float24
73 BitField<0, 24, u32> viewport_depth_far_plane; // float24 73 BitField<0, 24, u32> viewport_depth_near_plane; // float24
74 74
75 BitField<0, 3, u32> vs_output_total; 75 BitField<0, 3, u32> vs_output_total;
76 76
@@ -122,9 +122,31 @@ struct Regs {
122 BitField<16, 10, s32> y; 122 BitField<16, 10, s32> y;
123 } viewport_corner; 123 } viewport_corner;
124 124
125 INSERT_PADDING_WORDS(0x17); 125 INSERT_PADDING_WORDS(0x1);
126
127 //TODO: early depth
128 INSERT_PADDING_WORDS(0x1);
129
130 INSERT_PADDING_WORDS(0x2);
131
132 enum DepthBuffering : u32 {
133 WBuffering = 0,
134 ZBuffering = 1,
135 };
136 BitField< 0, 1, DepthBuffering> depthmap_enable;
137
138 INSERT_PADDING_WORDS(0x12);
126 139
127 struct TextureConfig { 140 struct TextureConfig {
141 enum TextureType : u32 {
142 Texture2D = 0,
143 TextureCube = 1,
144 Shadow2D = 2,
145 Projection2D = 3,
146 ShadowCube = 4,
147 Disabled = 5,
148 };
149
128 enum WrapMode : u32 { 150 enum WrapMode : u32 {
129 ClampToEdge = 0, 151 ClampToEdge = 0,
130 ClampToBorder = 1, 152 ClampToBorder = 1,
@@ -155,6 +177,7 @@ struct Regs {
155 BitField< 2, 1, TextureFilter> min_filter; 177 BitField< 2, 1, TextureFilter> min_filter;
156 BitField< 8, 2, WrapMode> wrap_t; 178 BitField< 8, 2, WrapMode> wrap_t;
157 BitField<12, 2, WrapMode> wrap_s; 179 BitField<12, 2, WrapMode> wrap_s;
180 BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew.
158 }; 181 };
159 182
160 INSERT_PADDING_WORDS(0x1); 183 INSERT_PADDING_WORDS(0x1);
@@ -1279,10 +1302,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40);
1279ASSERT_REG_POSITION(viewport_size_x, 0x41); 1302ASSERT_REG_POSITION(viewport_size_x, 0x41);
1280ASSERT_REG_POSITION(viewport_size_y, 0x43); 1303ASSERT_REG_POSITION(viewport_size_y, 0x43);
1281ASSERT_REG_POSITION(viewport_depth_range, 0x4d); 1304ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
1282ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); 1305ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e);
1283ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); 1306ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
1284ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); 1307ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
1285ASSERT_REG_POSITION(viewport_corner, 0x68); 1308ASSERT_REG_POSITION(viewport_corner, 0x68);
1309ASSERT_REG_POSITION(depthmap_enable, 0x6D);
1286ASSERT_REG_POSITION(texture0_enable, 0x80); 1310ASSERT_REG_POSITION(texture0_enable, 0x80);
1287ASSERT_REG_POSITION(texture0, 0x81); 1311ASSERT_REG_POSITION(texture0, 0x81);
1288ASSERT_REG_POSITION(texture0_format, 0x8e); 1312ASSERT_REG_POSITION(texture0_format, 0x8e);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 1059c6ae4..495174c25 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -25,6 +25,8 @@ struct State {
25 Shader::ShaderSetup vs; 25 Shader::ShaderSetup vs;
26 Shader::ShaderSetup gs; 26 Shader::ShaderSetup gs;
27 27
28 std::array<Math::Vec4<float24>, 16> vs_default_attributes;
29
28 struct { 30 struct {
29 union LutEntry { 31 union LutEntry {
30 // Used for raw access 32 // Used for raw access
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index df67b9081..65168f05a 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -442,8 +442,33 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
442 442
443 DEBUG_ASSERT(0 != texture.config.address); 443 DEBUG_ASSERT(0 != texture.config.address);
444 444
445 int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); 445 float24 u = uv[i].u();
446 int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); 446 float24 v = uv[i].v();
447
448 // Only unit 0 respects the texturing type (according to 3DBrew)
449 // TODO: Refactor so cubemaps and shadowmaps can be handled
450 if (i == 0) {
451 switch(texture.config.type) {
452 case Regs::TextureConfig::Texture2D:
453 break;
454 case Regs::TextureConfig::Projection2D: {
455 auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
456 u /= tc0_w;
457 v /= tc0_w;
458 break;
459 }
460 default:
461 // TODO: Change to LOG_ERROR when more types are handled.
462 LOG_DEBUG(HW_GPU, "Unhandled texture type %x", (int)texture.config.type);
463 UNIMPLEMENTED();
464 break;
465 }
466 }
467
468 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
469 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
470
471
447 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { 472 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
448 switch (mode) { 473 switch (mode) {
449 case Regs::TextureConfig::ClampToEdge: 474 case Regs::TextureConfig::ClampToEdge:
@@ -862,10 +887,30 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
862 } 887 }
863 } 888 }
864 889
890 // interpolated_z = z / w
891 float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
892 v1.screenpos[2].ToFloat32() * w1 +
893 v2.screenpos[2].ToFloat32() * w2) / wsum;
894
895 // Not fully accurate. About 3 bits in precision are missing.
896 // Z-Buffer (z / w * scale + offset)
897 float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
898 float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
899 float depth = interpolated_z_over_w * depth_scale + depth_offset;
900
901 // Potentially switch to W-Buffer
902 if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
903
904 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
905 depth *= interpolated_w_inverse.ToFloat32() * wsum;
906 }
907
908 // Clamp the result
909 depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
910
911 // Convert float to integer
865 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); 912 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
866 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + 913 u32 z = (u32)(depth * ((1 << num_bits) - 1));
867 v1.screenpos[2].ToFloat32() * w1 +
868 v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
869 914
870 if (output_merger.depth_test_enable) { 915 if (output_merger.depth_test_enable) {
871 u32 ref_z = GetDepth(x >> 4, y >> 4); 916 u32 ref_z = GetDepth(x >> 4, y >> 4);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0b471dfd2..bcd1ae78d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -76,6 +76,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
78 78
79 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w));
80 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W);
81
79 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); 82 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
80 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
81 84
@@ -101,7 +104,6 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
101 104
102 // Sync fixed function OpenGL state 105 // Sync fixed function OpenGL state
103 SyncCullMode(); 106 SyncCullMode();
104 SyncDepthModifiers();
105 SyncBlendEnabled(); 107 SyncBlendEnabled();
106 SyncBlendFuncs(); 108 SyncBlendFuncs();
107 SyncBlendColor(); 109 SyncBlendColor();
@@ -256,8 +258,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
256 258
257 // Depth modifiers 259 // Depth modifiers
258 case PICA_REG_INDEX(viewport_depth_range): 260 case PICA_REG_INDEX(viewport_depth_range):
259 case PICA_REG_INDEX(viewport_depth_far_plane): 261 SyncDepthScale();
260 SyncDepthModifiers(); 262 break;
263 case PICA_REG_INDEX(viewport_depth_near_plane):
264 SyncDepthOffset();
265 break;
266
267 // Depth buffering
268 case PICA_REG_INDEX(depthmap_enable):
269 shader_dirty = true;
261 break; 270 break;
262 271
263 // Blending 272 // Blending
@@ -314,6 +323,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
314 SyncLogicOp(); 323 SyncLogicOp();
315 break; 324 break;
316 325
326 // Texture 0 type
327 case PICA_REG_INDEX(texture0.type):
328 shader_dirty = true;
329 break;
330
317 // TEV stages 331 // TEV stages
318 case PICA_REG_INDEX(tev_stage0.color_source1): 332 case PICA_REG_INDEX(tev_stage0.color_source1):
319 case PICA_REG_INDEX(tev_stage0.color_modifier1): 333 case PICA_REG_INDEX(tev_stage0.color_modifier1):
@@ -867,6 +881,8 @@ void RasterizerOpenGL::SetShader() {
867 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 881 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
868 882
869 // Update uniforms 883 // Update uniforms
884 SyncDepthScale();
885 SyncDepthOffset();
870 SyncAlphaTest(); 886 SyncAlphaTest();
871 SyncCombinerColor(); 887 SyncCombinerColor();
872 auto& tev_stages = Pica::g_state.regs.GetTevStages(); 888 auto& tev_stages = Pica::g_state.regs.GetTevStages();
@@ -909,13 +925,20 @@ void RasterizerOpenGL::SyncCullMode() {
909 } 925 }
910} 926}
911 927
912void RasterizerOpenGL::SyncDepthModifiers() { 928void RasterizerOpenGL::SyncDepthScale() {
913 float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 929 float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
914 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; 930 if (depth_scale != uniform_block_data.data.depth_scale) {
931 uniform_block_data.data.depth_scale = depth_scale;
932 uniform_block_data.dirty = true;
933 }
934}
915 935
916 // TODO: Implement scale modifier 936void RasterizerOpenGL::SyncDepthOffset() {
917 uniform_block_data.data.depth_offset = depth_offset; 937 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
918 uniform_block_data.dirty = true; 938 if (depth_offset != uniform_block_data.data.depth_offset) {
939 uniform_block_data.data.depth_offset = depth_offset;
940 uniform_block_data.dirty = true;
941 }
919} 942}
920 943
921void RasterizerOpenGL::SyncBlendEnabled() { 944void RasterizerOpenGL::SyncBlendEnabled() {
@@ -924,6 +947,8 @@ void RasterizerOpenGL::SyncBlendEnabled() {
924 947
925void RasterizerOpenGL::SyncBlendFuncs() { 948void RasterizerOpenGL::SyncBlendFuncs() {
926 const auto& regs = Pica::g_state.regs; 949 const auto& regs = Pica::g_state.regs;
950 state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb);
951 state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a);
927 state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); 952 state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
928 state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); 953 state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
929 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); 954 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 82fa61742..d70369400 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -39,140 +39,185 @@ struct ScreenInfo;
39 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where 39 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) 40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
41 * two separate shaders sharing the same key. 41 * two separate shaders sharing the same key.
42 *
43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X."
44 * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X."
45 * = Bytewise copy instead of memberwise copy.
46 * This is important because the padding bytes are included in the hash and comparison between objects.
42 */ 47 */
43struct PicaShaderConfig { 48union PicaShaderConfig {
49
44 /// Construct a PicaShaderConfig with the current Pica register configuration. 50 /// Construct a PicaShaderConfig with the current Pica register configuration.
45 static PicaShaderConfig CurrentConfig() { 51 static PicaShaderConfig CurrentConfig() {
46 PicaShaderConfig res; 52 PicaShaderConfig res;
53
54 auto& state = res.state;
55 std::memset(&state, 0, sizeof(PicaShaderConfig::State));
56
47 const auto& regs = Pica::g_state.regs; 57 const auto& regs = Pica::g_state.regs;
48 58
49 res.alpha_test_func = regs.output_merger.alpha_test.enable ? 59 state.depthmap_enable = regs.depthmap_enable;
60
61 state.alpha_test_func = regs.output_merger.alpha_test.enable ?
50 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; 62 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always;
51 63
64 state.texture0_type = regs.texture0.type;
65
52 // Copy relevant tev stages fields. 66 // Copy relevant tev stages fields.
53 // We don't sync const_color here because of the high variance, it is a 67 // We don't sync const_color here because of the high variance, it is a
54 // shader uniform instead. 68 // shader uniform instead.
55 const auto& tev_stages = regs.GetTevStages(); 69 const auto& tev_stages = regs.GetTevStages();
56 DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size()); 70 DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
57 for (size_t i = 0; i < tev_stages.size(); i++) { 71 for (size_t i = 0; i < tev_stages.size(); i++) {
58 const auto& tev_stage = tev_stages[i]; 72 const auto& tev_stage = tev_stages[i];
59 res.tev_stages[i].sources_raw = tev_stage.sources_raw; 73 state.tev_stages[i].sources_raw = tev_stage.sources_raw;
60 res.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; 74 state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
61 res.tev_stages[i].ops_raw = tev_stage.ops_raw; 75 state.tev_stages[i].ops_raw = tev_stage.ops_raw;
62 res.tev_stages[i].scales_raw = tev_stage.scales_raw; 76 state.tev_stages[i].scales_raw = tev_stage.scales_raw;
63 } 77 }
64 78
65 res.combiner_buffer_input = 79 state.combiner_buffer_input =
66 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 80 regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
67 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; 81 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
68 82
69 // Fragment lighting 83 // Fragment lighting
70 84
71 res.lighting.enable = !regs.lighting.disable; 85 state.lighting.enable = !regs.lighting.disable;
72 res.lighting.src_num = regs.lighting.num_lights + 1; 86 state.lighting.src_num = regs.lighting.num_lights + 1;
73 87
74 for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { 88 for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
75 unsigned num = regs.lighting.light_enable.GetNum(light_index); 89 unsigned num = regs.lighting.light_enable.GetNum(light_index);
76 const auto& light = regs.lighting.light[num]; 90 const auto& light = regs.lighting.light[num];
77 res.lighting.light[light_index].num = num; 91 state.lighting.light[light_index].num = num;
78 res.lighting.light[light_index].directional = light.directional != 0; 92 state.lighting.light[light_index].directional = light.directional != 0;
79 res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; 93 state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
80 res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); 94 state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
81 res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); 95 state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
82 res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); 96 state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
83 } 97 }
84 98
85 res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; 99 state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
86 res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; 100 state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
87 res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); 101 state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
88 res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); 102 state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
89 103
90 res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; 104 state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
91 res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; 105 state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
92 res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); 106 state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
93 res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); 107 state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
94 108
95 res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; 109 state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
96 res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; 110 state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
97 res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); 111 state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
98 res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); 112 state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
99 113
100 res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; 114 state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
101 res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; 115 state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
102 res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); 116 state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
103 res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); 117 state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
104 118
105 res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; 119 state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
106 res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; 120 state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
107 res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); 121 state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
108 res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); 122 state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
109 123
110 res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; 124 state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
111 res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; 125 state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
112 res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); 126 state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
113 res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); 127 state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
114 128
115 res.lighting.config = regs.lighting.config; 129 state.lighting.config = regs.lighting.config;
116 res.lighting.fresnel_selector = regs.lighting.fresnel_selector; 130 state.lighting.fresnel_selector = regs.lighting.fresnel_selector;
117 res.lighting.bump_mode = regs.lighting.bump_mode; 131 state.lighting.bump_mode = regs.lighting.bump_mode;
118 res.lighting.bump_selector = regs.lighting.bump_selector; 132 state.lighting.bump_selector = regs.lighting.bump_selector;
119 res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; 133 state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
120 res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; 134 state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
121 135
122 return res; 136 return res;
123 } 137 }
124 138
125 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { 139 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
126 return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); 140 return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
127 } 141 }
128 142
129 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { 143 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
130 return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); 144 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
131 } 145 }
132 146
133 bool operator ==(const PicaShaderConfig& o) const { 147 bool operator ==(const PicaShaderConfig& o) const {
134 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; 148 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
149 };
150
151 // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC.
152 // This makes BitField not TC when used in a union or struct so we have to resort
153 // to this ugly hack.
154 // Once that bug is fixed we can use Pica::Regs::TevStageConfig here.
155 // Doesn't include const_color because we don't sync it, see comment in CurrentConfig()
156 struct TevStageConfigRaw {
157 u32 sources_raw;
158 u32 modifiers_raw;
159 u32 ops_raw;
160 u32 scales_raw;
161 explicit operator Pica::Regs::TevStageConfig() const noexcept {
162 Pica::Regs::TevStageConfig stage;
163 stage.sources_raw = sources_raw;
164 stage.modifiers_raw = modifiers_raw;
165 stage.ops_raw = ops_raw;
166 stage.const_color = 0;
167 stage.scales_raw = scales_raw;
168 return stage;
169 }
135 }; 170 };
136 171
137 Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; 172 struct State {
138 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
139 u8 combiner_buffer_input = 0;
140 173
141 struct { 174 Pica::Regs::CompareFunc alpha_test_func;
142 struct { 175 Pica::Regs::TextureConfig::TextureType texture0_type;
143 unsigned num = 0; 176 std::array<TevStageConfigRaw, 6> tev_stages;
144 bool directional = false; 177 u8 combiner_buffer_input;
145 bool two_sided_diffuse = false; 178
146 bool dist_atten_enable = false; 179 Pica::Regs::DepthBuffering depthmap_enable;
147 GLfloat dist_atten_scale = 0.0f;
148 GLfloat dist_atten_bias = 0.0f;
149 } light[8];
150
151 bool enable = false;
152 unsigned src_num = 0;
153 Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
154 unsigned bump_selector = 0;
155 bool bump_renorm = false;
156 bool clamp_highlights = false;
157
158 Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
159 Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
160 180
161 struct { 181 struct {
162 bool enable = false; 182 struct {
163 bool abs_input = false; 183 unsigned num;
164 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; 184 bool directional;
165 float scale = 1.0f; 185 bool two_sided_diffuse;
166 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; 186 bool dist_atten_enable;
167 } lighting; 187 GLfloat dist_atten_scale;
188 GLfloat dist_atten_bias;
189 } light[8];
190
191 bool enable;
192 unsigned src_num;
193 Pica::Regs::LightingBumpMode bump_mode;
194 unsigned bump_selector;
195 bool bump_renorm;
196 bool clamp_highlights;
197
198 Pica::Regs::LightingConfig config;
199 Pica::Regs::LightingFresnelSelector fresnel_selector;
200
201 struct {
202 bool enable;
203 bool abs_input;
204 Pica::Regs::LightingLutInput type;
205 float scale;
206 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
207 } lighting;
208
209 } state;
168}; 210};
211#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
212static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable");
213#endif
169 214
170namespace std { 215namespace std {
171 216
172template <> 217template <>
173struct hash<PicaShaderConfig> { 218struct hash<PicaShaderConfig> {
174 size_t operator()(const PicaShaderConfig& k) const { 219 size_t operator()(const PicaShaderConfig& k) const {
175 return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); 220 return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State));
176 } 221 }
177}; 222};
178 223
@@ -239,6 +284,7 @@ private:
239 tex_coord1[1] = v.tc1.y.ToFloat32(); 284 tex_coord1[1] = v.tc1.y.ToFloat32();
240 tex_coord2[0] = v.tc2.x.ToFloat32(); 285 tex_coord2[0] = v.tc2.x.ToFloat32();
241 tex_coord2[1] = v.tc2.y.ToFloat32(); 286 tex_coord2[1] = v.tc2.y.ToFloat32();
287 tex_coord0_w = v.tc0_w.ToFloat32();
242 normquat[0] = v.quat.x.ToFloat32(); 288 normquat[0] = v.quat.x.ToFloat32();
243 normquat[1] = v.quat.y.ToFloat32(); 289 normquat[1] = v.quat.y.ToFloat32();
244 normquat[2] = v.quat.z.ToFloat32(); 290 normquat[2] = v.quat.z.ToFloat32();
@@ -259,6 +305,7 @@ private:
259 GLfloat tex_coord0[2]; 305 GLfloat tex_coord0[2];
260 GLfloat tex_coord1[2]; 306 GLfloat tex_coord1[2];
261 GLfloat tex_coord2[2]; 307 GLfloat tex_coord2[2];
308 GLfloat tex_coord0_w;
262 GLfloat normquat[4]; 309 GLfloat normquat[4];
263 GLfloat view[3]; 310 GLfloat view[3];
264 }; 311 };
@@ -277,6 +324,7 @@ private:
277 GLvec4 const_color[6]; 324 GLvec4 const_color[6];
278 GLvec4 tev_combiner_buffer_color; 325 GLvec4 tev_combiner_buffer_color;
279 GLint alphatest_ref; 326 GLint alphatest_ref;
327 GLfloat depth_scale;
280 GLfloat depth_offset; 328 GLfloat depth_offset;
281 alignas(16) GLvec3 lighting_global_ambient; 329 alignas(16) GLvec3 lighting_global_ambient;
282 LightSrc light_src[8]; 330 LightSrc light_src[8];
@@ -291,8 +339,11 @@ private:
291 /// Syncs the cull mode to match the PICA register 339 /// Syncs the cull mode to match the PICA register
292 void SyncCullMode(); 340 void SyncCullMode();
293 341
294 /// Syncs the depth scale and offset to match the PICA registers 342 /// Syncs the depth scale to match the PICA register
295 void SyncDepthModifiers(); 343 void SyncDepthScale();
344
345 /// Syncs the depth offset to match the PICA register
346 void SyncDepthOffset();
296 347
297 /// Syncs the blend enabled status to match the PICA register 348 /// Syncs the blend enabled status to match the PICA register
298 void SyncBlendEnabled(); 349 void SyncBlendEnabled();
@@ -365,7 +416,7 @@ private:
365 UniformData data; 416 UniformData data;
366 bool lut_dirty[6]; 417 bool lut_dirty[6];
367 bool dirty; 418 bool dirty;
368 } uniform_block_data; 419 } uniform_block_data = {};
369 420
370 std::array<SamplerInfo, 3> texture_samplers; 421 std::array<SamplerInfo, 3> texture_samplers;
371 OGLVertexArray vertex_array; 422 OGLVertexArray vertex_array;
@@ -374,5 +425,5 @@ private:
374 OGLFramebuffer framebuffer; 425 OGLFramebuffer framebuffer;
375 426
376 std::array<OGLTexture, 6> lighting_luts; 427 std::array<OGLTexture, 6> lighting_luts;
377 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 428 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
378}; 429};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9011caa39..71d60e69c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -32,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
32} 32}
33 33
34/// Writes the specified TEV stage source component(s) 34/// Writes the specified TEV stage source component(s)
35static void AppendSource(std::string& out, TevStageConfig::Source source, 35static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source,
36 const std::string& index_name) { 36 const std::string& index_name) {
37 const auto& state = config.state;
37 using Source = TevStageConfig::Source; 38 using Source = TevStageConfig::Source;
38 switch (source) { 39 switch (source) {
39 case Source::PrimaryColor: 40 case Source::PrimaryColor:
@@ -46,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
46 out += "secondary_fragment_color"; 47 out += "secondary_fragment_color";
47 break; 48 break;
48 case Source::Texture0: 49 case Source::Texture0:
49 out += "texture(tex[0], texcoord[0])"; 50 // Only unit 0 respects the texturing type (according to 3DBrew)
51 switch(state.texture0_type) {
52 case Pica::Regs::TextureConfig::Texture2D:
53 out += "texture(tex[0], texcoord[0])";
54 break;
55 case Pica::Regs::TextureConfig::Projection2D:
56 out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))";
57 break;
58 default:
59 out += "texture(tex[0], texcoord[0])";
60 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type));
61 UNIMPLEMENTED();
62 break;
63 }
50 break; 64 break;
51 case Source::Texture1: 65 case Source::Texture1:
52 out += "texture(tex[1], texcoord[1])"; 66 out += "texture(tex[1], texcoord[1])";
@@ -71,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
71} 85}
72 86
73/// Writes the color components to use for the specified TEV stage color modifier 87/// Writes the color components to use for the specified TEV stage color modifier
74static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, 88static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier,
75 TevStageConfig::Source source, const std::string& index_name) { 89 TevStageConfig::Source source, const std::string& index_name) {
76 using ColorModifier = TevStageConfig::ColorModifier; 90 using ColorModifier = TevStageConfig::ColorModifier;
77 switch (modifier) { 91 switch (modifier) {
78 case ColorModifier::SourceColor: 92 case ColorModifier::SourceColor:
79 AppendSource(out, source, index_name); 93 AppendSource(out, config, source, index_name);
80 out += ".rgb"; 94 out += ".rgb";
81 break; 95 break;
82 case ColorModifier::OneMinusSourceColor: 96 case ColorModifier::OneMinusSourceColor:
83 out += "vec3(1.0) - "; 97 out += "vec3(1.0) - ";
84 AppendSource(out, source, index_name); 98 AppendSource(out, config, source, index_name);
85 out += ".rgb"; 99 out += ".rgb";
86 break; 100 break;
87 case ColorModifier::SourceAlpha: 101 case ColorModifier::SourceAlpha:
88 AppendSource(out, source, index_name); 102 AppendSource(out, config, source, index_name);
89 out += ".aaa"; 103 out += ".aaa";
90 break; 104 break;
91 case ColorModifier::OneMinusSourceAlpha: 105 case ColorModifier::OneMinusSourceAlpha:
92 out += "vec3(1.0) - "; 106 out += "vec3(1.0) - ";
93 AppendSource(out, source, index_name); 107 AppendSource(out, config, source, index_name);
94 out += ".aaa"; 108 out += ".aaa";
95 break; 109 break;
96 case ColorModifier::SourceRed: 110 case ColorModifier::SourceRed:
97 AppendSource(out, source, index_name); 111 AppendSource(out, config, source, index_name);
98 out += ".rrr"; 112 out += ".rrr";
99 break; 113 break;
100 case ColorModifier::OneMinusSourceRed: 114 case ColorModifier::OneMinusSourceRed:
101 out += "vec3(1.0) - "; 115 out += "vec3(1.0) - ";
102 AppendSource(out, source, index_name); 116 AppendSource(out, config, source, index_name);
103 out += ".rrr"; 117 out += ".rrr";
104 break; 118 break;
105 case ColorModifier::SourceGreen: 119 case ColorModifier::SourceGreen:
106 AppendSource(out, source, index_name); 120 AppendSource(out, config, source, index_name);
107 out += ".ggg"; 121 out += ".ggg";
108 break; 122 break;
109 case ColorModifier::OneMinusSourceGreen: 123 case ColorModifier::OneMinusSourceGreen:
110 out += "vec3(1.0) - "; 124 out += "vec3(1.0) - ";
111 AppendSource(out, source, index_name); 125 AppendSource(out, config, source, index_name);
112 out += ".ggg"; 126 out += ".ggg";
113 break; 127 break;
114 case ColorModifier::SourceBlue: 128 case ColorModifier::SourceBlue:
115 AppendSource(out, source, index_name); 129 AppendSource(out, config, source, index_name);
116 out += ".bbb"; 130 out += ".bbb";
117 break; 131 break;
118 case ColorModifier::OneMinusSourceBlue: 132 case ColorModifier::OneMinusSourceBlue:
119 out += "vec3(1.0) - "; 133 out += "vec3(1.0) - ";
120 AppendSource(out, source, index_name); 134 AppendSource(out, config, source, index_name);
121 out += ".bbb"; 135 out += ".bbb";
122 break; 136 break;
123 default: 137 default:
@@ -128,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier
128} 142}
129 143
130/// Writes the alpha component to use for the specified TEV stage alpha modifier 144/// Writes the alpha component to use for the specified TEV stage alpha modifier
131static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, 145static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier,
132 TevStageConfig::Source source, const std::string& index_name) { 146 TevStageConfig::Source source, const std::string& index_name) {
133 using AlphaModifier = TevStageConfig::AlphaModifier; 147 using AlphaModifier = TevStageConfig::AlphaModifier;
134 switch (modifier) { 148 switch (modifier) {
135 case AlphaModifier::SourceAlpha: 149 case AlphaModifier::SourceAlpha:
136 AppendSource(out, source, index_name); 150 AppendSource(out, config, source, index_name);
137 out += ".a"; 151 out += ".a";
138 break; 152 break;
139 case AlphaModifier::OneMinusSourceAlpha: 153 case AlphaModifier::OneMinusSourceAlpha:
140 out += "1.0 - "; 154 out += "1.0 - ";
141 AppendSource(out, source, index_name); 155 AppendSource(out, config, source, index_name);
142 out += ".a"; 156 out += ".a";
143 break; 157 break;
144 case AlphaModifier::SourceRed: 158 case AlphaModifier::SourceRed:
145 AppendSource(out, source, index_name); 159 AppendSource(out, config, source, index_name);
146 out += ".r"; 160 out += ".r";
147 break; 161 break;
148 case AlphaModifier::OneMinusSourceRed: 162 case AlphaModifier::OneMinusSourceRed:
149 out += "1.0 - "; 163 out += "1.0 - ";
150 AppendSource(out, source, index_name); 164 AppendSource(out, config, source, index_name);
151 out += ".r"; 165 out += ".r";
152 break; 166 break;
153 case AlphaModifier::SourceGreen: 167 case AlphaModifier::SourceGreen:
154 AppendSource(out, source, index_name); 168 AppendSource(out, config, source, index_name);
155 out += ".g"; 169 out += ".g";
156 break; 170 break;
157 case AlphaModifier::OneMinusSourceGreen: 171 case AlphaModifier::OneMinusSourceGreen:
158 out += "1.0 - "; 172 out += "1.0 - ";
159 AppendSource(out, source, index_name); 173 AppendSource(out, config, source, index_name);
160 out += ".g"; 174 out += ".g";
161 break; 175 break;
162 case AlphaModifier::SourceBlue: 176 case AlphaModifier::SourceBlue:
163 AppendSource(out, source, index_name); 177 AppendSource(out, config, source, index_name);
164 out += ".b"; 178 out += ".b";
165 break; 179 break;
166 case AlphaModifier::OneMinusSourceBlue: 180 case AlphaModifier::OneMinusSourceBlue:
167 out += "1.0 - "; 181 out += "1.0 - ";
168 AppendSource(out, source, index_name); 182 AppendSource(out, config, source, index_name);
169 out += ".b"; 183 out += ".b";
170 break; 184 break;
171 default: 185 default:
@@ -287,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
287 301
288/// Writes the code to emulate the specified TEV stage 302/// Writes the code to emulate the specified TEV stage
289static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { 303static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
290 auto& stage = config.tev_stages[index]; 304 const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]);
291 if (!IsPassThroughTevStage(stage)) { 305 if (!IsPassThroughTevStage(stage)) {
292 std::string index_name = std::to_string(index); 306 std::string index_name = std::to_string(index);
293 307
294 out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; 308 out += "vec3 color_results_" + index_name + "[3] = vec3[3](";
295 AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); 309 AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
296 out += ", "; 310 out += ", ";
297 AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); 311 AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
298 out += ", "; 312 out += ", ";
299 AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); 313 AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
300 out += ");\n"; 314 out += ");\n";
301 315
302 out += "vec3 color_output_" + index_name + " = "; 316 out += "vec3 color_output_" + index_name + " = ";
@@ -304,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
304 out += ";\n"; 318 out += ";\n";
305 319
306 out += "float alpha_results_" + index_name + "[3] = float[3]("; 320 out += "float alpha_results_" + index_name + "[3] = float[3](";
307 AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); 321 AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
308 out += ", "; 322 out += ", ";
309 AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); 323 AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
310 out += ", "; 324 out += ", ";
311 AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); 325 AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
312 out += ");\n"; 326 out += ");\n";
313 327
314 out += "float alpha_output_" + index_name + " = "; 328 out += "float alpha_output_" + index_name + " = ";
@@ -331,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
331 345
332/// Writes the code to emulate fragment lighting 346/// Writes the code to emulate fragment lighting
333static void WriteLighting(std::string& out, const PicaShaderConfig& config) { 347static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
348 const auto& lighting = config.state.lighting;
349
334 // Define lighting globals 350 // Define lighting globals
335 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 351 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
336 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 352 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
@@ -338,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
338 "vec3 refl_value = vec3(0.0);\n"; 354 "vec3 refl_value = vec3(0.0);\n";
339 355
340 // Compute fragment normals 356 // Compute fragment normals
341 if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { 357 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
342 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture 358 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
343 std::string bump_selector = std::to_string(config.lighting.bump_selector); 359 std::string bump_selector = std::to_string(lighting.bump_selector);
344 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; 360 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
345 361
346 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result 362 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
347 if (config.lighting.bump_renorm) { 363 if (lighting.bump_renorm) {
348 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; 364 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
349 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; 365 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
350 } 366 }
351 } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { 367 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
352 // Bump mapping is enabled using a tangent map 368 // Bump mapping is enabled using a tangent map
353 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); 369 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
354 UNIMPLEMENTED(); 370 UNIMPLEMENTED();
@@ -361,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
361 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; 377 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
362 378
363 // Gets the index into the specified lookup table for specular lighting 379 // Gets the index into the specified lookup table for specular lighting
364 auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { 380 auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) {
365 const std::string half_angle = "normalize(normalize(view) + light_vector)"; 381 const std::string half_angle = "normalize(normalize(view) + light_vector)";
366 std::string index; 382 std::string index;
367 switch (input) { 383 switch (input) {
@@ -389,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
389 405
390 if (abs) { 406 if (abs) {
391 // LUT index is in the range of (0.0, 1.0) 407 // LUT index is in the range of (0.0, 1.0)
392 index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; 408 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
393 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; 409 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
394 } else { 410 } else {
395 // LUT index is in the range of (-1.0, 1.0) 411 // LUT index is in the range of (-1.0, 1.0)
@@ -407,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
407 }; 423 };
408 424
409 // Write the code to emulate each enabled light 425 // Write the code to emulate each enabled light
410 for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { 426 for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) {
411 const auto& light_config = config.lighting.light[light_index]; 427 const auto& light_config = lighting.light[light_index];
412 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; 428 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
413 429
414 // Compute light vector (directional or positional) 430 // Compute light vector (directional or positional)
@@ -432,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
432 } 448 }
433 449
434 // If enabled, clamp specular component if lighting result is negative 450 // If enabled, clamp specular component if lighting result is negative
435 std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; 451 std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
436 452
437 // Specular 0 component 453 // Specular 0 component
438 std::string d0_lut_value = "1.0"; 454 std::string d0_lut_value = "1.0";
439 if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { 455 if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
440 // Lookup specular "distribution 0" LUT value 456 // Lookup specular "distribution 0" LUT value
441 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); 457 std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
442 d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; 458 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
443 } 459 }
444 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; 460 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
445 461
446 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used 462 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
447 if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { 463 if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
448 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); 464 std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
449 std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; 465 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
450 out += "refl_value.r = " + value + ";\n"; 466 out += "refl_value.r = " + value + ";\n";
451 } else { 467 } else {
452 out += "refl_value.r = 1.0;\n"; 468 out += "refl_value.r = 1.0;\n";
453 } 469 }
454 470
455 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used 471 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
456 if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { 472 if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
457 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); 473 std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
458 std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; 474 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
459 out += "refl_value.g = " + value + ";\n"; 475 out += "refl_value.g = " + value + ";\n";
460 } else { 476 } else {
461 out += "refl_value.g = refl_value.r;\n"; 477 out += "refl_value.g = refl_value.r;\n";
462 } 478 }
463 479
464 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used 480 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
465 if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { 481 if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
466 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); 482 std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
467 std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; 483 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
468 out += "refl_value.b = " + value + ";\n"; 484 out += "refl_value.b = " + value + ";\n";
469 } else { 485 } else {
470 out += "refl_value.b = refl_value.r;\n"; 486 out += "refl_value.b = refl_value.r;\n";
@@ -472,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
472 488
473 // Specular 1 component 489 // Specular 1 component
474 std::string d1_lut_value = "1.0"; 490 std::string d1_lut_value = "1.0";
475 if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { 491 if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
476 // Lookup specular "distribution 1" LUT value 492 // Lookup specular "distribution 1" LUT value
477 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); 493 std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
478 d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; 494 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
479 } 495 }
480 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; 496 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
481 497
482 // Fresnel 498 // Fresnel
483 if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { 499 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
484 // Lookup fresnel LUT value 500 // Lookup fresnel LUT value
485 std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); 501 std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
486 std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; 502 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
487 503
488 // Enabled for difffuse lighting alpha component 504 // Enabled for difffuse lighting alpha component
489 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || 505 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
490 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 506 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
491 out += "diffuse_sum.a *= " + value + ";\n"; 507 out += "diffuse_sum.a *= " + value + ";\n";
492 508
493 // Enabled for the specular lighting alpha component 509 // Enabled for the specular lighting alpha component
494 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || 510 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
495 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 511 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
496 out += "specular_sum.a *= " + value + ";\n"; 512 out += "specular_sum.a *= " + value + ";\n";
497 } 513 }
498 514
@@ -510,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
510} 526}
511 527
512std::string GenerateFragmentShader(const PicaShaderConfig& config) { 528std::string GenerateFragmentShader(const PicaShaderConfig& config) {
529 const auto& state = config.state;
530
513 std::string out = R"( 531 std::string out = R"(
514#version 330 core 532#version 330 core
515#define NUM_TEV_STAGES 6 533#define NUM_TEV_STAGES 6
@@ -519,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
519 537
520in vec4 primary_color; 538in vec4 primary_color;
521in vec2 texcoord[3]; 539in vec2 texcoord[3];
540in float texcoord0_w;
522in vec4 normquat; 541in vec4 normquat;
523in vec3 view; 542in vec3 view;
524 543
@@ -536,6 +555,7 @@ layout (std140) uniform shader_data {
536 vec4 const_color[NUM_TEV_STAGES]; 555 vec4 const_color[NUM_TEV_STAGES];
537 vec4 tev_combiner_buffer_color; 556 vec4 tev_combiner_buffer_color;
538 int alphatest_ref; 557 int alphatest_ref;
558 float depth_scale;
539 float depth_offset; 559 float depth_offset;
540 vec3 lighting_global_ambient; 560 vec3 lighting_global_ambient;
541 LightSrc light_src[NUM_LIGHTS]; 561 LightSrc light_src[NUM_LIGHTS];
@@ -555,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0);
555)"; 575)";
556 576
557 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test 577 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
558 if (config.alpha_test_func == Regs::CompareFunc::Never) { 578 if (state.alpha_test_func == Regs::CompareFunc::Never) {
559 out += "discard; }"; 579 out += "discard; }";
560 return out; 580 return out;
561 } 581 }
562 582
563 if (config.lighting.enable) 583 if (state.lighting.enable)
564 WriteLighting(out, config); 584 WriteLighting(out, config);
565 585
566 out += "vec4 combiner_buffer = vec4(0.0);\n"; 586 out += "vec4 combiner_buffer = vec4(0.0);\n";
567 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; 587 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
568 out += "vec4 last_tex_env_out = vec4(0.0);\n"; 588 out += "vec4 last_tex_env_out = vec4(0.0);\n";
569 589
570 for (size_t index = 0; index < config.tev_stages.size(); ++index) 590 for (size_t index = 0; index < state.tev_stages.size(); ++index)
571 WriteTevStage(out, config, (unsigned)index); 591 WriteTevStage(out, config, (unsigned)index);
572 592
573 if (config.alpha_test_func != Regs::CompareFunc::Always) { 593 if (state.alpha_test_func != Regs::CompareFunc::Always) {
574 out += "if ("; 594 out += "if (";
575 AppendAlphaTestCondition(out, config.alpha_test_func); 595 AppendAlphaTestCondition(out, state.alpha_test_func);
576 out += ") discard;\n"; 596 out += ") discard;\n";
577 } 597 }
578 598
579 out += "color = last_tex_env_out;\n"; 599 out += "color = last_tex_env_out;\n";
580 out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; 600
601 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
602 out += "float depth = z_over_w * depth_scale + depth_offset;\n";
603 if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
604 out += "depth /= gl_FragCoord.w;\n";
605 }
606 out += "gl_FragDepth = depth;\n";
607
608 out += "}";
581 609
582 return out; 610 return out;
583} 611}
@@ -585,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0);
585std::string GenerateVertexShader() { 613std::string GenerateVertexShader() {
586 std::string out = "#version 330 core\n"; 614 std::string out = "#version 330 core\n";
587 615
588 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 616 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
589 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 617 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
590 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 618 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
591 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 619 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
592 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 620 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
593 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; 621 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n";
594 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; 622 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
623 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
595 624
596 out += R"( 625 out += R"(
597out vec4 primary_color; 626out vec4 primary_color;
598out vec2 texcoord[3]; 627out vec2 texcoord[3];
628out float texcoord0_w;
599out vec4 normquat; 629out vec4 normquat;
600out vec3 view; 630out vec3 view;
601 631
@@ -604,6 +634,7 @@ void main() {
604 texcoord[0] = vert_texcoord0; 634 texcoord[0] = vert_texcoord0;
605 texcoord[1] = vert_texcoord1; 635 texcoord[1] = vert_texcoord1;
606 texcoord[2] = vert_texcoord2; 636 texcoord[2] = vert_texcoord2;
637 texcoord0_w = vert_texcoord0_w;
607 normquat = vert_normquat; 638 normquat = vert_normquat;
608 view = vert_view; 639 view = vert_view;
609 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); 640 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3eb07d57a..bef3249cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -6,7 +6,7 @@
6 6
7#include <string> 7#include <string>
8 8
9struct PicaShaderConfig; 9union PicaShaderConfig;
10 10
11namespace GLShader { 11namespace GLShader {
12 12
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 097242f6f..f59912f79 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,7 @@ enum Attributes {
14 ATTRIBUTE_TEXCOORD0, 14 ATTRIBUTE_TEXCOORD0,
15 ATTRIBUTE_TEXCOORD1, 15 ATTRIBUTE_TEXCOORD1,
16 ATTRIBUTE_TEXCOORD2, 16 ATTRIBUTE_TEXCOORD2,
17 ATTRIBUTE_TEXCOORD0_W,
17 ATTRIBUTE_NORMQUAT, 18 ATTRIBUTE_NORMQUAT,
18 ATTRIBUTE_VIEW, 19 ATTRIBUTE_VIEW,
19}; 20};
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 02cd9f417..fa141fc9a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -36,6 +36,8 @@ OpenGLState::OpenGLState() {
36 stencil.action_stencil_fail = GL_KEEP; 36 stencil.action_stencil_fail = GL_KEEP;
37 37
38 blend.enabled = false; 38 blend.enabled = false;
39 blend.rgb_equation = GL_FUNC_ADD;
40 blend.a_equation = GL_FUNC_ADD;
39 blend.src_rgb_func = GL_ONE; 41 blend.src_rgb_func = GL_ONE;
40 blend.dst_rgb_func = GL_ZERO; 42 blend.dst_rgb_func = GL_ZERO;
41 blend.src_a_func = GL_ONE; 43 blend.src_a_func = GL_ONE;
@@ -165,6 +167,11 @@ void OpenGLState::Apply() const {
165 blend.src_a_func, blend.dst_a_func); 167 blend.src_a_func, blend.dst_a_func);
166 } 168 }
167 169
170 if (blend.rgb_equation != cur_state.blend.rgb_equation ||
171 blend.a_equation != cur_state.blend.a_equation) {
172 glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
173 }
174
168 if (logic_op != cur_state.logic_op) { 175 if (logic_op != cur_state.logic_op) {
169 glLogicOp(logic_op); 176 glLogicOp(logic_op);
170 } 177 }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 24f20e47c..228727054 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -40,6 +40,8 @@ public:
40 40
41 struct { 41 struct {
42 bool enabled; // GL_BLEND 42 bool enabled; // GL_BLEND
43 GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
44 GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
43 GLenum src_rgb_func; // GL_BLEND_SRC_RGB 45 GLenum src_rgb_func; // GL_BLEND_SRC_RGB
44 GLenum dst_rgb_func; // GL_BLEND_DST_RGB 46 GLenum dst_rgb_func; // GL_BLEND_DST_RGB
45 GLenum src_a_func; // GL_BLEND_SRC_ALPHA 47 GLenum src_a_func; // GL_BLEND_SRC_ALPHA
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 976d1f364..6dc2758c5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -78,6 +78,26 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
78 return gl_mode; 78 return gl_mode;
79} 79}
80 80
81inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) {
82 static const GLenum blend_equation_table[] = {
83 GL_FUNC_ADD, // BlendEquation::Add
84 GL_FUNC_SUBTRACT, // BlendEquation::Subtract
85 GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract
86 GL_MIN, // BlendEquation::Min
87 GL_MAX, // BlendEquation::Max
88 };
89
90 // Range check table for input
91 if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) {
92 LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation);
93 UNREACHABLE();
94
95 return GL_FUNC_ADD;
96 }
97
98 return blend_equation_table[(unsigned)equation];
99}
100
81inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { 101inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
82 static const GLenum blend_func_table[] = { 102 static const GLenum blend_func_table[] = {
83 GL_ZERO, // BlendFactor::Zero 103 GL_ZERO, // BlendFactor::Zero
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 65dcc9156..161097610 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -35,7 +35,13 @@ static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
35static const JitShader* jit_shader; 35static const JitShader* jit_shader;
36#endif // ARCHITECTURE_x86_64 36#endif // ARCHITECTURE_x86_64
37 37
38void Setup() { 38void ClearCache() {
39#ifdef ARCHITECTURE_x86_64
40 shader_map.clear();
41#endif // ARCHITECTURE_x86_64
42}
43
44void ShaderSetup::Setup() {
39#ifdef ARCHITECTURE_x86_64 45#ifdef ARCHITECTURE_x86_64
40 if (VideoCore::g_shader_jit_enabled) { 46 if (VideoCore::g_shader_jit_enabled) {
41 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 47 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
@@ -54,20 +60,14 @@ void Setup() {
54#endif // ARCHITECTURE_x86_64 60#endif // ARCHITECTURE_x86_64
55} 61}
56 62
57void Shutdown() { 63MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
58#ifdef ARCHITECTURE_x86_64
59 shader_map.clear();
60#endif // ARCHITECTURE_x86_64
61}
62
63MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
64 64
65OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
66 auto& config = g_state.regs.vs; 66 auto& config = g_state.regs.vs;
67 auto& setup = g_state.vs;
67 68
68 MICROPROFILE_SCOPE(GPU_VertexShader); 69 MICROPROFILE_SCOPE(GPU_Shader);
69 70
70 state.program_counter = config.main_offset;
71 state.debug.max_offset = 0; 71 state.debug.max_offset = 0;
72 state.debug.max_opdesc_id = 0; 72 state.debug.max_opdesc_id = 0;
73 73
@@ -82,11 +82,11 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
82 82
83#ifdef ARCHITECTURE_x86_64 83#ifdef ARCHITECTURE_x86_64
84 if (VideoCore::g_shader_jit_enabled) 84 if (VideoCore::g_shader_jit_enabled)
85 jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); 85 jit_shader->Run(setup, state, config.main_offset);
86 else 86 else
87 RunInterpreter(state); 87 RunInterpreter(setup, state, config.main_offset);
88#else 88#else
89 RunInterpreter(state); 89 RunInterpreter(setup, state, config.main_offset);
90#endif // ARCHITECTURE_x86_64 90#endif // ARCHITECTURE_x86_64
91 91
92 // Setup output data 92 // Setup output data
@@ -140,10 +140,9 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
140 return ret; 140 return ret;
141} 141}
142 142
143DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 143DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
144 UnitState<true> state; 144 UnitState<true> state;
145 145
146 state.program_counter = config.main_offset;
147 state.debug.max_offset = 0; 146 state.debug.max_offset = 0;
148 state.debug.max_opdesc_id = 0; 147 state.debug.max_opdesc_id = 0;
149 148
@@ -158,7 +157,7 @@ DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, c
158 state.conditional_code[0] = false; 157 state.conditional_code[0] = false;
159 state.conditional_code[1] = false; 158 state.conditional_code[1] = false;
160 159
161 RunInterpreter(state); 160 RunInterpreter(setup, state, config.main_offset);
162 return state.debug; 161 return state.debug;
163} 162}
164 163
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 56b83bfeb..84898f21c 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -43,7 +43,8 @@ struct OutputVertex {
43 Math::Vec4<float24> color; 43 Math::Vec4<float24> color;
44 Math::Vec2<float24> tc0; 44 Math::Vec2<float24> tc0;
45 Math::Vec2<float24> tc1; 45 Math::Vec2<float24> tc1;
46 INSERT_PADDING_WORDS(2); 46 float24 tc0_w;
47 INSERT_PADDING_WORDS(1);
47 Math::Vec3<float24> view; 48 Math::Vec3<float24> view;
48 INSERT_PADDING_WORDS(1); 49 INSERT_PADDING_WORDS(1);
49 Math::Vec2<float24> tc2; 50 Math::Vec2<float24> tc2;
@@ -83,23 +84,6 @@ struct OutputVertex {
83static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 84static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
84static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 85static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
85 86
86/// Vertex shader memory
87struct ShaderSetup {
88 struct {
89 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
90 // therefore required to be 16-byte aligned.
91 alignas(16) Math::Vec4<float24> f[96];
92
93 std::array<bool, 16> b;
94 std::array<Math::Vec4<u8>, 4> i;
95 } uniforms;
96
97 Math::Vec4<float24> default_attributes[16];
98
99 std::array<u32, 1024> program_code;
100 std::array<u32, 1024> swizzle_data;
101};
102
103// Helper structure used to keep track of data useful for inspection of shader emulation 87// Helper structure used to keep track of data useful for inspection of shader emulation
104template<bool full_debugging> 88template<bool full_debugging>
105struct DebugData; 89struct DebugData;
@@ -288,38 +272,21 @@ struct UnitState {
288 } registers; 272 } registers;
289 static_assert(std::is_pod<Registers>::value, "Structure is not POD"); 273 static_assert(std::is_pod<Registers>::value, "Structure is not POD");
290 274
291 u32 program_counter;
292 bool conditional_code[2]; 275 bool conditional_code[2];
293 276
294 // Two Address registers and one loop counter 277 // Two Address registers and one loop counter
295 // TODO: How many bits do these actually have? 278 // TODO: How many bits do these actually have?
296 s32 address_registers[3]; 279 s32 address_registers[3];
297 280
298 enum {
299 INVALID_ADDRESS = 0xFFFFFFFF
300 };
301
302 struct CallStackElement {
303 u32 final_address; // Address upon which we jump to return_address
304 u32 return_address; // Where to jump when leaving scope
305 u8 repeat_counter; // How often to repeat until this call stack element is removed
306 u8 loop_increment; // Which value to add to the loop counter after an iteration
307 // TODO: Should this be a signed value? Does it even matter?
308 u32 loop_address; // The address where we'll return to after each loop iteration
309 };
310
311 // TODO: Is there a maximal size for this?
312 boost::container::static_vector<CallStackElement, 16> call_stack;
313
314 DebugData<Debug> debug; 281 DebugData<Debug> debug;
315 282
316 static size_t InputOffset(const SourceRegister& reg) { 283 static size_t InputOffset(const SourceRegister& reg) {
317 switch (reg.GetRegisterType()) { 284 switch (reg.GetRegisterType()) {
318 case RegisterType::Input: 285 case RegisterType::Input:
319 return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 286 return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
320 287
321 case RegisterType::Temporary: 288 case RegisterType::Temporary:
322 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 289 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
323 290
324 default: 291 default:
325 UNREACHABLE(); 292 UNREACHABLE();
@@ -330,10 +297,10 @@ struct UnitState {
330 static size_t OutputOffset(const DestRegister& reg) { 297 static size_t OutputOffset(const DestRegister& reg) {
331 switch (reg.GetRegisterType()) { 298 switch (reg.GetRegisterType()) {
332 case RegisterType::Output: 299 case RegisterType::Output:
333 return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 300 return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
334 301
335 case RegisterType::Temporary: 302 case RegisterType::Temporary:
336 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 303 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
337 304
338 default: 305 default:
339 UNREACHABLE(); 306 UNREACHABLE();
@@ -342,33 +309,66 @@ struct UnitState {
342 } 309 }
343}; 310};
344 311
345/** 312/// Clears the shader cache
346 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 313void ClearCache();
347 * vertex, which would happen within the `Run` function).
348 */
349void Setup();
350 314
351/// Performs any cleanup when the emulator is shutdown 315struct ShaderSetup {
352void Shutdown();
353 316
354/** 317 struct {
355 * Runs the currently setup shader 318 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
356 * @param state Shader unit state, must be setup per shader and per shader unit 319 // therefore required to be 16-byte aligned.
357 * @param input Input vertex into the shader 320 alignas(16) Math::Vec4<float24> f[96];
358 * @param num_attributes The number of vertex shader attributes
359 * @return The output vertex, after having been processed by the vertex shader
360 */
361OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
362 321
363/** 322 std::array<bool, 16> b;
364 * Produce debug information based on the given shader and input vertex 323 std::array<Math::Vec4<u8>, 4> i;
365 * @param input Input vertex into the shader 324 } uniforms;
366 * @param num_attributes The number of vertex shader attributes 325
367 * @param config Configuration object for the shader pipeline 326 static size_t UniformOffset(RegisterType type, unsigned index) {
368 * @param setup Setup object for the shader pipeline 327 switch (type) {
369 * @return Debug information for this shader with regards to the given vertex 328 case RegisterType::FloatUniform:
370 */ 329 return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
371DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); 330
331 case RegisterType::BoolUniform:
332 return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
333
334 case RegisterType::IntUniform:
335 return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
336
337 default:
338 UNREACHABLE();
339 return 0;
340 }
341 }
342
343 std::array<u32, 1024> program_code;
344 std::array<u32, 1024> swizzle_data;
345
346 /**
347 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
348 * vertex, which would happen within the `Run` function).
349 */
350 void Setup();
351
352 /**
353 * Runs the currently setup shader
354 * @param state Shader unit state, must be setup per shader and per shader unit
355 * @param input Input vertex into the shader
356 * @param num_attributes The number of vertex shader attributes
357 * @return The output vertex, after having been processed by the vertex shader
358 */
359 OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
360
361 /**
362 * Produce debug information based on the given shader and input vertex
363 * @param input Input vertex into the shader
364 * @param num_attributes The number of vertex shader attributes
365 * @param config Configuration object for the shader pipeline
366 * @param setup Setup object for the shader pipeline
367 * @return Debug information for this shader with regards to the given vertex
368 */
369 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
370
371};
372 372
373} // namespace Shader 373} // namespace Shader
374 374
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 7710f7fbc..714e8bfd5 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -29,8 +29,24 @@ namespace Pica {
29 29
30namespace Shader { 30namespace Shader {
31 31
32constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF;
33
34struct CallStackElement {
35 u32 final_address; // Address upon which we jump to return_address
36 u32 return_address; // Where to jump when leaving scope
37 u8 repeat_counter; // How often to repeat until this call stack element is removed
38 u8 loop_increment; // Which value to add to the loop counter after an iteration
39 // TODO: Should this be a signed value? Does it even matter?
40 u32 loop_address; // The address where we'll return to after each loop iteration
41};
42
32template<bool Debug> 43template<bool Debug>
33void RunInterpreter(UnitState<Debug>& state) { 44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
45 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack;
47
48 u32 program_counter = offset;
49
34 const auto& uniforms = g_state.vs.uniforms; 50 const auto& uniforms = g_state.vs.uniforms;
35 const auto& swizzle_data = g_state.vs.swizzle_data; 51 const auto& swizzle_data = g_state.vs.swizzle_data;
36 const auto& program_code = g_state.vs.program_code; 52 const auto& program_code = g_state.vs.program_code;
@@ -41,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) {
41 unsigned iteration = 0; 57 unsigned iteration = 0;
42 bool exit_loop = false; 58 bool exit_loop = false;
43 while (!exit_loop) { 59 while (!exit_loop) {
44 if (!state.call_stack.empty()) { 60 if (!call_stack.empty()) {
45 auto& top = state.call_stack.back(); 61 auto& top = call_stack.back();
46 if (state.program_counter == top.final_address) { 62 if (program_counter == top.final_address) {
47 state.address_registers[2] += top.loop_increment; 63 state.address_registers[2] += top.loop_increment;
48 64
49 if (top.repeat_counter-- == 0) { 65 if (top.repeat_counter-- == 0) {
50 state.program_counter = top.return_address; 66 program_counter = top.return_address;
51 state.call_stack.pop_back(); 67 call_stack.pop_back();
52 } else { 68 } else {
53 state.program_counter = top.loop_address; 69 program_counter = top.loop_address;
54 } 70 }
55 71
56 // TODO: Is "trying again" accurate to hardware? 72 // TODO: Is "trying again" accurate to hardware?
@@ -58,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) {
58 } 74 }
59 } 75 }
60 76
61 const Instruction instr = { program_code[state.program_counter] }; 77 const Instruction instr = { program_code[program_counter] };
62 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
63 79
64 static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, 80 static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
65 u32 return_offset, u8 repeat_count, u8 loop_increment) { 81 u32 return_offset, u8 repeat_count, u8 loop_increment) {
66 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
67 ASSERT(state.call_stack.size() < state.call_stack.capacity()); 83 ASSERT(call_stack.size() < call_stack.capacity());
68 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
69 }; 85 };
70 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); 86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
71 if (iteration > 0) 87 if (iteration > 0)
72 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); 88 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter);
73 89
74 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); 90 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter);
75 91
76 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { 92 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
77 switch (source_reg.GetRegisterType()) { 93 switch (source_reg.GetRegisterType()) {
@@ -519,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) {
519 case OpCode::Id::JMPC: 535 case OpCode::Id::JMPC:
520 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
521 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
522 state.program_counter = instr.flow_control.dest_offset - 1; 538 program_counter = instr.flow_control.dest_offset - 1;
523 } 539 }
524 break; 540 break;
525 541
@@ -527,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) {
527 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
528 544
529 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
530 state.program_counter = instr.flow_control.dest_offset - 1; 546 program_counter = instr.flow_control.dest_offset - 1;
531 } 547 }
532 break; 548 break;
533 549
@@ -535,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) {
535 call(state, 551 call(state,
536 instr.flow_control.dest_offset, 552 instr.flow_control.dest_offset,
537 instr.flow_control.num_instructions, 553 instr.flow_control.num_instructions,
538 state.program_counter + 1, 0, 0); 554 program_counter + 1, 0, 0);
539 break; 555 break;
540 556
541 case OpCode::Id::CALLU: 557 case OpCode::Id::CALLU:
@@ -544,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) {
544 call(state, 560 call(state,
545 instr.flow_control.dest_offset, 561 instr.flow_control.dest_offset,
546 instr.flow_control.num_instructions, 562 instr.flow_control.num_instructions,
547 state.program_counter + 1, 0, 0); 563 program_counter + 1, 0, 0);
548 } 564 }
549 break; 565 break;
550 566
@@ -554,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) {
554 call(state, 570 call(state,
555 instr.flow_control.dest_offset, 571 instr.flow_control.dest_offset,
556 instr.flow_control.num_instructions, 572 instr.flow_control.num_instructions,
557 state.program_counter + 1, 0, 0); 573 program_counter + 1, 0, 0);
558 } 574 }
559 break; 575 break;
560 576
@@ -565,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) {
565 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
566 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 582 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
567 call(state, 583 call(state,
568 state.program_counter + 1, 584 program_counter + 1,
569 instr.flow_control.dest_offset - state.program_counter - 1, 585 instr.flow_control.dest_offset - program_counter - 1,
570 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
571 } else { 587 } else {
572 call(state, 588 call(state,
@@ -584,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) {
584 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
585 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
586 call(state, 602 call(state,
587 state.program_counter + 1, 603 program_counter + 1,
588 instr.flow_control.dest_offset - state.program_counter - 1, 604 instr.flow_control.dest_offset - program_counter - 1,
589 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
590 } else { 606 } else {
591 call(state, 607 call(state,
@@ -607,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) {
607 623
608 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
609 call(state, 625 call(state,
610 state.program_counter + 1, 626 program_counter + 1,
611 instr.flow_control.dest_offset - state.program_counter + 1, 627 instr.flow_control.dest_offset - program_counter + 1,
612 instr.flow_control.dest_offset + 1, 628 instr.flow_control.dest_offset + 1,
613 loop_param.x, 629 loop_param.x,
614 loop_param.z); 630 loop_param.z);
@@ -625,14 +641,14 @@ void RunInterpreter(UnitState<Debug>& state) {
625 } 641 }
626 } 642 }
627 643
628 ++state.program_counter; 644 ++program_counter;
629 ++iteration; 645 ++iteration;
630 } 646 }
631} 647}
632 648
633// Explicit instantiation 649// Explicit instantiation
634template void RunInterpreter(UnitState<false>& state); 650template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset);
635template void RunInterpreter(UnitState<true>& state); 651template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset);
636 652
637} // namespace 653} // namespace
638 654
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 6048cdf3a..bb3ce1c6e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -11,7 +11,7 @@ namespace Shader {
11template <bool Debug> struct UnitState; 11template <bool Debug> struct UnitState;
12 12
13template<bool Debug> 13template<bool Debug>
14void RunInterpreter(UnitState<Debug>& state); 14void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
15 15
16} // namespace 16} // namespace
17 17
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 99f6c51eb..43e7e6b4c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
102// purposes, as documented below: 102// purposes, as documented below:
103 103
104/// Pointer to the uniform memory 104/// Pointer to the uniform memory
105static const X64Reg UNIFORMS = R9; 105static const X64Reg SETUP = R9;
106/// The two 32-bit VS address offset registers set by the MOVA instruction 106/// The two 32-bit VS address offset registers set by the MOVA instruction
107static const X64Reg ADDROFFS_REG_0 = R10; 107static const X64Reg ADDROFFS_REG_0 = R10;
108static const X64Reg ADDROFFS_REG_1 = R11; 108static const X64Reg ADDROFFS_REG_1 = R11;
@@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
117/// Result of the previous CMP instruction for the Y-component comparison 117/// Result of the previous CMP instruction for the Y-component comparison
118static const X64Reg COND1 = R14; 118static const X64Reg COND1 = R14;
119/// Pointer to the UnitState instance for the current VS unit 119/// Pointer to the UnitState instance for the current VS unit
120static const X64Reg REGISTERS = R15; 120static const X64Reg STATE = R15;
121/// SIMD scratch register 121/// SIMD scratch register
122static const X64Reg SCRATCH = XMM0; 122static const X64Reg SCRATCH = XMM0;
123/// Loaded with the first swizzled source register, otherwise can be used as a scratch register 123/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
136// State registers that must not be modified by external functions calls 136// State registers that must not be modified by external functions calls
137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
138static const BitSet32 persistent_regs = { 138static const BitSet32 persistent_regs = {
139 UNIFORMS, REGISTERS, // Pointers to register blocks 139 SETUP, STATE, // Pointers to register blocks
140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers 140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
141 ONE+16, NEGBIT+16, // Constants 141 ONE+16, NEGBIT+16, // Constants
142}; 142};
@@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
177 size_t src_offset; 177 size_t src_offset;
178 178
179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { 179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
180 src_ptr = UNIFORMS; 180 src_ptr = SETUP;
181 src_offset = src_reg.GetIndex() * sizeof(float24) * 4; 181 src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
182 } else { 182 } else {
183 src_ptr = REGISTERS; 183 src_ptr = STATE;
184 src_offset = UnitState<false>::InputOffset(src_reg); 184 src_offset = UnitState<false>::InputOffset(src_reg);
185 } 185 }
186 186
@@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
264 // If all components are enabled, write the result to the destination register 264 // If all components are enabled, write the result to the destination register
265 if (swiz.dest_mask == NO_DEST_REG_MASK) { 265 if (swiz.dest_mask == NO_DEST_REG_MASK) {
266 // Store dest back to memory 266 // Store dest back to memory
267 MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); 267 MOVAPS(MDisp(STATE, dest_offset_disp), src);
268 268
269 } else { 269 } else {
270 // Not all components are enabled, so mask the result when storing to the destination register... 270 // Not all components are enabled, so mask the result when storing to the destination register...
271 MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); 271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
272 272
273 if (Common::GetCPUCaps().sse4_1) { 273 if (Common::GetCPUCaps().sse4_1) {
274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
287 } 287 }
288 288
289 // Store dest back to memory 289 // Store dest back to memory
290 MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); 290 MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
291 } 291 }
292} 292}
293 293
@@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
336} 336}
337 337
338void JitShader::Compile_UniformCondition(Instruction instr) { 338void JitShader::Compile_UniformCondition(Instruction instr) {
339 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); 339 int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
340 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 340 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
341} 341}
342 342
343BitSet32 JitShader::PersistentCallerSavedRegs() { 343BitSet32 JitShader::PersistentCallerSavedRegs() {
@@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
714 714
715 looping = true; 715 looping = true;
716 716
717 int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); 717 int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
718 MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); 718 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
720 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 720 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@@ -826,8 +826,8 @@ void JitShader::Compile() {
826 // The stack pointer is 8 modulo 16 at the entry of a procedure 826 // The stack pointer is 8 modulo 16 at the entry of a procedure
827 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 827 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
828 828
829 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); 829 MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
830 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); 830 MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
831 831
832 // Zero address/loop registers 832 // Zero address/loop registers
833 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); 833 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@@ -845,7 +845,7 @@ void JitShader::Compile() {
845 MOVAPS(NEGBIT, MatR(RAX)); 845 MOVAPS(NEGBIT, MatR(RAX));
846 846
847 // Jump to start of the shader program 847 // Jump to start of the shader program
848 JMPptr(R(ABI_PARAM2)); 848 JMPptr(R(ABI_PARAM3));
849 849
850 // Compile entire program 850 // Compile entire program
851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); 851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 30aa7ff30..5468459d4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
36public: 36public:
37 JitShader(); 37 JitShader();
38 38
39 void Run(void* registers, unsigned offset) const { 39 void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
40 program(registers, code_ptr[offset]); 40 program(&setup, &state, code_ptr[offset]);
41 } 41 }
42 42
43 void Compile(); 43 void Compile();
@@ -117,7 +117,7 @@ private:
117 /// Branches that need to be fixed up once the entire shader program is compiled 117 /// Branches that need to be fixed up once the entire shader program is compiled
118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; 118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
119 119
120 using CompiledShader = void(void* registers, const u8* start_addr); 120 using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
121 CompiledShader* program = nullptr; 121 CompiledShader* program = nullptr;
122}; 122};
123 123
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index 18a7cf144..e40f0f1ee 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -130,7 +130,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
131 } else if (vertex_attribute_is_default[i]) { 131 } else if (vertex_attribute_is_default[i]) {
132 // Load the default attribute if we're configured to do so 132 // Load the default attribute if we're configured to do so
133 input.attr[i] = g_state.vs.default_attributes[i]; 133 input.attr[i] = g_state.vs_default_attributes[i];
134 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", 134 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
135 i, vertex, index, 135 i, vertex, index,
136 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), 136 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),