summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/audio_core/CMakeLists.txt16
-rw-r--r--src/audio_core/audio_core.cpp53
-rw-r--r--src/audio_core/audio_core.h26
-rw-r--r--src/audio_core/hle/dsp.cpp42
-rw-r--r--src/audio_core/hle/dsp.h502
-rw-r--r--src/audio_core/hle/pipe.cpp55
-rw-r--r--src/audio_core/hle/pipe.h38
-rw-r--r--src/audio_core/sink.h34
-rw-r--r--src/citra/CMakeLists.txt2
-rw-r--r--src/citra_qt/CMakeLists.txt2
-rw-r--r--src/common/bit_field.h44
-rw-r--r--src/common/emu_window.cpp4
-rw-r--r--src/common/logging/backend.cpp2
-rw-r--r--src/common/logging/backend.h20
-rw-r--r--src/common/logging/log.h2
-rw-r--r--src/core/hle/kernel/memory.cpp5
-rw-r--r--src/core/hle/kernel/process.cpp2
-rw-r--r--src/core/hle/result.h8
-rw-r--r--src/core/hle/service/cfg/cfg.cpp6
-rw-r--r--src/core/hle/service/dsp_dsp.cpp135
-rw-r--r--src/core/hle/service/dsp_dsp.h12
-rw-r--r--src/core/hle/service/gsp_gpu.cpp6
-rw-r--r--src/core/hle/service/hid/hid.cpp2
-rw-r--r--src/core/hle/service/ptm/ptm.cpp4
-rw-r--r--src/core/hle/service/soc_u.cpp12
-rw-r--r--src/core/hw/gpu.cpp22
-rw-r--r--src/core/system.cpp7
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/clipper.cpp10
-rw-r--r--src/video_core/command_processor.cpp60
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp8
-rw-r--r--src/video_core/pica.h401
-rw-r--r--src/video_core/pica_types.h146
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp397
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h151
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp219
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_state.h4
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h12
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp4
-rw-r--r--src/video_core/shader/shader.cpp6
-rw-r--r--src/video_core/shader/shader.h8
44 files changed, 2151 insertions, 354 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cb09f3cd1..2bb411492 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,6 +4,7 @@ include_directories(.)
4add_subdirectory(common) 4add_subdirectory(common)
5add_subdirectory(core) 5add_subdirectory(core)
6add_subdirectory(video_core) 6add_subdirectory(video_core)
7add_subdirectory(audio_core)
7if (ENABLE_GLFW) 8if (ENABLE_GLFW)
8 add_subdirectory(citra) 9 add_subdirectory(citra)
9endif() 10endif()
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
new file mode 100644
index 000000000..b0d1c7eb6
--- /dev/null
+++ b/src/audio_core/CMakeLists.txt
@@ -0,0 +1,16 @@
1set(SRCS
2 audio_core.cpp
3 hle/dsp.cpp
4 hle/pipe.cpp
5 )
6
7set(HEADERS
8 audio_core.h
9 hle/dsp.h
10 hle/pipe.h
11 sink.h
12 )
13
14create_directory_groups(${SRCS} ${HEADERS})
15
16add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
new file mode 100644
index 000000000..894f46990
--- /dev/null
+++ b/src/audio_core/audio_core.cpp
@@ -0,0 +1,53 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/audio_core.h"
6#include "audio_core/hle/dsp.h"
7
8#include "core/core_timing.h"
9#include "core/hle/kernel/vm_manager.h"
10#include "core/hle/service/dsp_dsp.h"
11
12namespace AudioCore {
13
14// Audio Ticks occur about every 5 miliseconds.
15static int tick_event; ///< CoreTiming event
16static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
17
18static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
19 if (DSP::HLE::Tick()) {
20 // HACK: We're not signaling the interrups when they should be, but just firing them all off together.
21 // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here.
22 // TODO(merry): Understand when the other interrupts are fired.
23 DSP_DSP::SignalAllInterrupts();
24 }
25
26 // Reschedule recurrent event
27 CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event);
28}
29
30/// Initialise Audio
31void Init() {
32 DSP::HLE::Init();
33
34 tick_event = CoreTiming::RegisterEvent("AudioCore::tick_event", AudioTickCallback);
35 CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event);
36}
37
38/// Add DSP address spaces to Process's address space.
39void AddAddressSpace(Kernel::VMManager& address_space) {
40 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
41 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite);
42
43 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
44 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite);
45}
46
47/// Shutdown Audio
48void Shutdown() {
49 CoreTiming::UnscheduleEvent(tick_event, 0);
50 DSP::HLE::Shutdown();
51}
52
53} //namespace
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
new file mode 100644
index 000000000..64c330914
--- /dev/null
+++ b/src/audio_core/audio_core.h
@@ -0,0 +1,26 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Kernel {
8class VMManager;
9}
10
11namespace AudioCore {
12
13constexpr int num_sources = 24;
14constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
15constexpr int native_sample_rate = 32728; ///< 32kHz
16
17/// Initialise Audio Core
18void Init();
19
20/// Add DSP address spaces to a Process.
21void AddAddressSpace(Kernel::VMManager& vm_manager);
22
23/// Shutdown Audio Core
24void Shutdown();
25
26} // namespace
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
new file mode 100644
index 000000000..c89356edc
--- /dev/null
+++ b/src/audio_core/hle/dsp.cpp
@@ -0,0 +1,42 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/hle/dsp.h"
6#include "audio_core/hle/pipe.h"
7
8namespace DSP {
9namespace HLE {
10
11SharedMemory g_region0;
12SharedMemory g_region1;
13
14void Init() {
15 DSP::HLE::ResetPipes();
16}
17
18void Shutdown() {
19}
20
21bool Tick() {
22 return true;
23}
24
25SharedMemory& CurrentRegion() {
26 // The region with the higher frame counter is chosen unless there is wraparound.
27
28 if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) {
29 // Wraparound has occured.
30 return g_region1;
31 }
32
33 if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) {
34 // Wraparound has occured.
35 return g_region0;
36 }
37
38 return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1;
39}
40
41} // namespace HLE
42} // namespace DSP
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
new file mode 100644
index 000000000..14c4000c6
--- /dev/null
+++ b/src/audio_core/hle/dsp.h
@@ -0,0 +1,502 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <type_traits>
9
10#include "audio_core/audio_core.h"
11
12#include "common/bit_field.h"
13#include "common/common_funcs.h"
14#include "common/common_types.h"
15#include "common/swap.h"
16
17namespace DSP {
18namespace HLE {
19
20// The application-accessible region of DSP memory consists of two parts.
21// Both are marked as IO and have Read/Write permissions.
22//
23// First Region: 0x1FF50000 (Size: 0x8000)
24// Second Region: 0x1FF70000 (Size: 0x8000)
25//
26// The DSP reads from each region alternately based on the frame counter for each region much like a
27// double-buffer. The frame counter is located as the very last u16 of each region and is incremented
28// each audio tick.
29
30struct SharedMemory;
31
32constexpr VAddr region0_base = 0x1FF50000;
33extern SharedMemory g_region0;
34
35constexpr VAddr region1_base = 0x1FF70000;
36extern SharedMemory g_region1;
37
38/**
39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
40 * its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian
41 * layout of the ARM11. Hence from the ARM11's point of view the memory space appears to be
42 * middle-endian.
43 *
44 * Unusually this does not appear to be an issue for floating point numbers. The DSP makes the more
45 * sensible choice of keeping that little-endian. There are also some exceptions such as the
46 * IntermediateMixSamples structure, which is little-endian.
47 *
48 * This struct implements the conversion to and from this middle-endianness.
49 */
50struct u32_dsp {
51 u32_dsp() = default;
52 operator u32() const {
53 return Convert(storage);
54 }
55 void operator=(u32 new_value) {
56 storage = Convert(new_value);
57 }
58private:
59 static constexpr u32 Convert(u32 value) {
60 return (value << 16) | (value >> 16);
61 }
62 u32_le storage;
63};
64#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
65static_assert(std::is_trivially_copyable<u32_dsp>::value, "u32_dsp isn't trivially copyable");
66#endif
67
68// There are 15 structures in each memory region. A table of them in the order they appear in memory
69// is presented below
70//
71// Pipe 2 # First Region DSP Address Purpose Control
72// 5 0x8400 DSP Status DSP
73// 9 0x8410 DSP Debug Info DSP
74// 6 0x8540 Final Mix Samples DSP
75// 2 0x8680 Source Status [24] DSP
76// 8 0x8710 Compressor Table Application
77// 4 0x9430 DSP Configuration Application
78// 7 0x9492 Intermediate Mix Samples DSP + App
79// 1 0x9E92 Source Configuration [24] Application
80// 3 0xA792 Source ADPCM Coefficients [24] Application
81// 10 0xA912 Surround Sound Related
82// 11 0xAA12 Surround Sound Related
83// 12 0xAAD2 Surround Sound Related
84// 13 0xAC52 Surround Sound Related
85// 14 0xAC5C Surround Sound Related
86// 0 0xBFFF Frame Counter Application
87//
88// Note that the above addresses do vary slightly between audio firmwares observed; the addresses are
89// not fixed in stone. The addresses above are only an examplar; they're what this implementation
90// does and provides to applications.
91//
92// Application requests the DSP service to convert DSP addresses into ARM11 virtual addresses using the
93// ConvertProcessAddressFromDspDram service call. Applications seem to derive the addresses for the
94// second region via:
95// second_region_dsp_addr = first_region_dsp_addr | 0x10000
96//
97// Applications maintain most of its own audio state, the memory region is used mainly for
98// communication and not storage of state.
99//
100// In the documentation below, filter and effect transfer functions are specified in the z domain.
101// (If you are more familiar with the Laplace transform, z = exp(sT). The z domain is the digital
102// frequency domain, just like how the s domain is the analog frequency domain.)
103
104#define INSERT_PADDING_DSPWORDS(num_words) INSERT_PADDING_BYTES(2 * (num_words))
105
106// GCC versions < 5.0 do not implement std::is_trivially_copyable.
107// Excluding MSVC because it has weird behaviour for std::is_trivially_copyable.
108#if (__GNUC__ >= 5) || defined(__clang__)
109 #define ASSERT_DSP_STRUCT(name, size) \
110 static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \
111 static_assert(std::is_trivially_copyable<name>::value, "DSP structure " #name " isn't trivially copyable"); \
112 static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name)
113#else
114 #define ASSERT_DSP_STRUCT(name, size) \
115 static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \
116 static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name)
117#endif
118
119struct SourceConfiguration {
120 struct Configuration {
121 /// These dirty flags are set by the application when it updates the fields in this struct.
122 /// The DSP clears these each audio frame.
123 union {
124 u32_le dirty_raw;
125
126 BitField<2, 1, u32_le> adpcm_coefficients_dirty;
127 BitField<3, 1, u32_le> partial_embedded_buffer_dirty; ///< Tends to be set when a looped buffer is queued.
128
129 BitField<16, 1, u32_le> enable_dirty;
130 BitField<17, 1, u32_le> interpolation_dirty;
131 BitField<18, 1, u32_le> rate_multiplier_dirty;
132 BitField<19, 1, u32_le> buffer_queue_dirty;
133 BitField<20, 1, u32_le> loop_related_dirty;
134 BitField<21, 1, u32_le> play_position_dirty; ///< Tends to also be set when embedded buffer is updated.
135 BitField<22, 1, u32_le> filters_enabled_dirty;
136 BitField<23, 1, u32_le> simple_filter_dirty;
137 BitField<24, 1, u32_le> biquad_filter_dirty;
138 BitField<25, 1, u32_le> gain_0_dirty;
139 BitField<26, 1, u32_le> gain_1_dirty;
140 BitField<27, 1, u32_le> gain_2_dirty;
141 BitField<28, 1, u32_le> sync_dirty;
142 BitField<29, 1, u32_le> reset_flag;
143
144 BitField<31, 1, u32_le> embedded_buffer_dirty;
145 };
146
147 // Gain control
148
149 /**
150 * Gain is between 0.0-1.0. This determines how much will this source appear on
151 * each of the 12 channels that feed into the intermediate mixers.
152 * Each of the three intermediate mixers is fed two left and two right channels.
153 */
154 float_le gain[3][4];
155
156 // Interpolation
157
158 /// Multiplier for sample rate. Resampling occurs with the selected interpolation method.
159 float_le rate_multiplier;
160
161 enum class InterpolationMode : u8 {
162 None = 0,
163 Linear = 1,
164 Polyphase = 2
165 };
166
167 InterpolationMode interpolation_mode;
168 INSERT_PADDING_BYTES(1); ///< Interpolation related
169
170 // Filters
171
172 /**
173 * This is the simplest normalized first-order digital recursive filter.
174 * The transfer function of this filter is:
175 * H(z) = b0 / (1 + a1 z^-1)
176 * Values are signed fixed point with 15 fractional bits.
177 */
178 struct SimpleFilter {
179 s16_le b0;
180 s16_le a1;
181 };
182
183 /**
184 * This is a normalised biquad filter (second-order).
185 * The transfer function of this filter is:
186 * H(z) = (b0 + b1 z^-1 + b2 z^-2) / (1 - a1 z^-1 - a2 z^-2)
187 * Nintendo chose to negate the feedbackward coefficients. This differs from standard notation
188 * as in: https://ccrma.stanford.edu/~jos/filters/Direct_Form_I.html
189 * Values are signed fixed point with 14 fractional bits.
190 */
191 struct BiquadFilter {
192 s16_le b0;
193 s16_le b1;
194 s16_le b2;
195 s16_le a1;
196 s16_le a2;
197 };
198
199 union {
200 u16_le filters_enabled;
201 BitField<0, 1, u16_le> simple_filter_enabled;
202 BitField<1, 1, u16_le> biquad_filter_enabled;
203 };
204
205 SimpleFilter simple_filter;
206 BiquadFilter biquad_filter;
207
208 // Buffer Queue
209
210 /// A buffer of audio data from the application, along with metadata about it.
211 struct Buffer {
212 /// Physical memory address of the start of the buffer
213 u32_dsp physical_address;
214
215 /// This is length in terms of samples.
216 /// Note that in different buffer formats a sample takes up different number of bytes.
217 u32_dsp length;
218
219 /// ADPCM Predictor (4 bits) and Scale (4 bits)
220 union {
221 u16_le adpcm_ps;
222 BitField<0, 4, u16_le> adpcm_scale;
223 BitField<4, 4, u16_le> adpcm_predictor;
224 };
225
226 /// ADPCM Historical Samples (y[n-1] and y[n-2])
227 u16_le adpcm_yn[2];
228
229 /// This is non-zero when the ADPCM values above are to be updated.
230 u8 adpcm_dirty;
231
232 /// Is a looping buffer.
233 u8 is_looping;
234
235 /// This value is shown in SourceStatus::previous_buffer_id when this buffer has finished.
236 /// This allows the emulated application to tell what buffer is currently playing
237 u16_le buffer_id;
238
239 INSERT_PADDING_DSPWORDS(1);
240 };
241
242 u16_le buffers_dirty; ///< Bitmap indicating which buffers are dirty (bit i -> buffers[i])
243 Buffer buffers[4]; ///< Queued Buffers
244
245 // Playback controls
246
247 u32_dsp loop_related;
248 u8 enable;
249 INSERT_PADDING_BYTES(1);
250 u16_le sync; ///< Application-side sync (See also: SourceStatus::sync)
251 u32_dsp play_position; ///< Position. (Units: number of samples)
252 INSERT_PADDING_DSPWORDS(2);
253
254 // Embedded Buffer
255 // This buffer is often the first buffer to be used when initiating audio playback,
256 // after which the buffer queue is used.
257
258 u32_dsp physical_address;
259
260 /// This is length in terms of samples.
261 /// Note a sample takes up different number of bytes in different buffer formats.
262 u32_dsp length;
263
264 enum class MonoOrStereo : u16_le {
265 Mono = 1,
266 Stereo = 2
267 };
268
269 enum class Format : u16_le {
270 PCM8 = 0,
271 PCM16 = 1,
272 ADPCM = 2
273 };
274
275 union {
276 u16_le flags1_raw;
277 BitField<0, 2, MonoOrStereo> mono_or_stereo;
278 BitField<2, 2, Format> format;
279 BitField<5, 1, u16_le> fade_in;
280 };
281
282 /// ADPCM Predictor (4 bit) and Scale (4 bit)
283 union {
284 u16_le adpcm_ps;
285 BitField<0, 4, u16_le> adpcm_scale;
286 BitField<4, 4, u16_le> adpcm_predictor;
287 };
288
289 /// ADPCM Historical Samples (y[n-1] and y[n-2])
290 u16_le adpcm_yn[2];
291
292 union {
293 u16_le flags2_raw;
294 BitField<0, 1, u16_le> adpcm_dirty; ///< Has the ADPCM info above been changed?
295 BitField<1, 1, u16_le> is_looping; ///< Is this a looping buffer?
296 };
297
298 /// Buffer id of embedded buffer (used as a buffer id in SourceStatus to reference this buffer).
299 u16_le buffer_id;
300 };
301
302 Configuration config[AudioCore::num_sources];
303};
304ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
305ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
306
307struct SourceStatus {
308 struct Status {
309 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
310 u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes
311 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync
312 u32_dsp buffer_position; ///< Number of samples into the current buffer
313 u16_le previous_buffer_id; ///< Updated when a buffer finishes playing
314 INSERT_PADDING_DSPWORDS(1);
315 };
316
317 Status status[AudioCore::num_sources];
318};
319ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
320
321struct DspConfiguration {
322 /// These dirty flags are set by the application when it updates the fields in this struct.
323 /// The DSP clears these each audio frame.
324 union {
325 u32_le dirty_raw;
326
327 BitField<8, 1, u32_le> mixer1_enabled_dirty;
328 BitField<9, 1, u32_le> mixer2_enabled_dirty;
329 BitField<10, 1, u32_le> delay_effect_0_dirty;
330 BitField<11, 1, u32_le> delay_effect_1_dirty;
331 BitField<12, 1, u32_le> reverb_effect_0_dirty;
332 BitField<13, 1, u32_le> reverb_effect_1_dirty;
333
334 BitField<16, 1, u32_le> volume_0_dirty;
335
336 BitField<24, 1, u32_le> volume_1_dirty;
337 BitField<25, 1, u32_le> volume_2_dirty;
338 BitField<26, 1, u32_le> output_format_dirty;
339 BitField<27, 1, u32_le> limiter_enabled_dirty;
340 BitField<28, 1, u32_le> headphones_connected_dirty;
341 };
342
343 /// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for each at the final mixer
344 float_le volume[3];
345
346 INSERT_PADDING_DSPWORDS(3);
347
348 enum class OutputFormat : u16_le {
349 Mono = 0,
350 Stereo = 1,
351 Surround = 2
352 };
353
354 OutputFormat output_format;
355
356 u16_le limiter_enabled; ///< Not sure of the exact gain equation for the limiter.
357 u16_le headphones_connected; ///< Application updates the DSP on headphone status.
358 INSERT_PADDING_DSPWORDS(4); ///< TODO: Surround sound related
359 INSERT_PADDING_DSPWORDS(2); ///< TODO: Intermediate mixer 1/2 related
360 u16_le mixer1_enabled;
361 u16_le mixer2_enabled;
362
363 /**
364 * This is delay with feedback.
365 * Transfer function:
366 * H(z) = a z^-N / (1 - b z^-1 + a g z^-N)
367 * where
368 * N = frame_count * samples_per_frame
369 * g, a and b are fixed point with 7 fractional bits
370 */
371 struct DelayEffect {
372 /// These dirty flags are set by the application when it updates the fields in this struct.
373 /// The DSP clears these each audio frame.
374 union {
375 u16_le dirty_raw;
376 BitField<0, 1, u16_le> enable_dirty;
377 BitField<1, 1, u16_le> work_buffer_address_dirty;
378 BitField<2, 1, u16_le> other_dirty; ///< Set when anything else has been changed
379 };
380
381 u16_le enable;
382 INSERT_PADDING_DSPWORDS(1);
383 u16_le outputs;
384 u32_dsp work_buffer_address; ///< The application allocates a block of memory for the DSP to use as a work buffer.
385 u16_le frame_count; ///< Frames to delay by
386
387 // Coefficients
388 s16_le g; ///< Fixed point with 7 fractional bits
389 s16_le a; ///< Fixed point with 7 fractional bits
390 s16_le b; ///< Fixed point with 7 fractional bits
391 };
392
393 DelayEffect delay_effect[2];
394
395 struct ReverbEffect {
396 INSERT_PADDING_DSPWORDS(26); ///< TODO
397 };
398
399 ReverbEffect reverb_effect[2];
400
401 INSERT_PADDING_DSPWORDS(4);
402};
403ASSERT_DSP_STRUCT(DspConfiguration, 196);
404ASSERT_DSP_STRUCT(DspConfiguration::DelayEffect, 20);
405ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
406
407struct AdpcmCoefficients {
408 /// Coefficients are signed fixed point with 11 fractional bits.
409 /// Each source has 16 coefficients associated with it.
410 s16_le coeff[AudioCore::num_sources][16];
411};
412ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
413
414struct DspStatus {
415 u16_le unknown;
416 u16_le dropped_frames;
417 INSERT_PADDING_DSPWORDS(0xE);
418};
419ASSERT_DSP_STRUCT(DspStatus, 32);
420
421/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
422/// When the application writes to this region it has no effect.
423struct FinalMixSamples {
424 s16_le pcm16[2 * AudioCore::samples_per_frame];
425};
426ASSERT_DSP_STRUCT(FinalMixSamples, 640);
427
428/// DSP writes output of intermediate mixers 1 and 2 here.
429/// Writes to this region by the application edits the output of the intermediate mixers.
430/// This seems to be intended to allow the application to do custom effects on the ARM11.
431/// Values that exceed s16 range will be clipped by the DSP after further processing.
432struct IntermediateMixSamples {
433 struct Samples {
434 s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
435 };
436
437 Samples mix1;
438 Samples mix2;
439};
440ASSERT_DSP_STRUCT(IntermediateMixSamples, 5120);
441
442/// Compressor table
443struct Compressor {
444 INSERT_PADDING_DSPWORDS(0xD20); ///< TODO
445};
446
447/// There is no easy way to implement this in a HLE implementation.
448struct DspDebug {
449 INSERT_PADDING_DSPWORDS(0x130);
450};
451ASSERT_DSP_STRUCT(DspDebug, 0x260);
452
453struct SharedMemory {
454 /// Padding
455 INSERT_PADDING_DSPWORDS(0x400);
456
457 DspStatus dsp_status;
458
459 DspDebug dsp_debug;
460
461 FinalMixSamples final_samples;
462
463 SourceStatus source_statuses;
464
465 Compressor compressor;
466
467 DspConfiguration dsp_configuration;
468
469 IntermediateMixSamples intermediate_mix_samples;
470
471 SourceConfiguration source_configurations;
472
473 AdpcmCoefficients adpcm_coefficients;
474
475 /// Unknown 10-14 (Surround sound related)
476 INSERT_PADDING_DSPWORDS(0x16ED);
477
478 u16_le frame_counter;
479};
480ASSERT_DSP_STRUCT(SharedMemory, 0x8000);
481
482#undef INSERT_PADDING_DSPWORDS
483#undef ASSERT_DSP_STRUCT
484
485/// Initialize DSP hardware
486void Init();
487
488/// Shutdown DSP hardware
489void Shutdown();
490
491/**
492 * Perform processing and updates state of current shared memory buffer.
493 * This function is called every audio tick before triggering the audio interrupt.
494 * @return Whether an audio interrupt should be triggered this frame.
495 */
496bool Tick();
497
498/// Returns a mutable reference to the current region. Current region is selected based on the frame counter.
499SharedMemory& CurrentRegion();
500
501} // namespace HLE
502} // namespace DSP
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp
new file mode 100644
index 000000000..6542c760c
--- /dev/null
+++ b/src/audio_core/hle/pipe.cpp
@@ -0,0 +1,55 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <vector>
7
8#include "audio_core/hle/pipe.h"
9
10#include "common/common_types.h"
11#include "common/logging/log.h"
12
13namespace DSP {
14namespace HLE {
15
16static size_t pipe2position = 0;
17
18void ResetPipes() {
19 pipe2position = 0;
20}
21
22std::vector<u8> PipeRead(u32 pipe_number, u32 length) {
23 if (pipe_number != 2) {
24 LOG_WARNING(Audio_DSP, "pipe_number = %u (!= 2), unimplemented", pipe_number);
25 return {}; // We currently don't handle anything other than the audio pipe.
26 }
27
28 // Canned DSP responses that games expect. These were taken from HW by 3dmoo team.
29 // TODO: Our implementation will actually use a slightly different response than this one.
30 // TODO: Use offsetof on DSP structures instead for a proper response.
31 static const std::array<u8, 32> canned_response {{
32 0x0F, 0x00, 0xFF, 0xBF, 0x8E, 0x9E, 0x80, 0x86, 0x8E, 0xA7, 0x30, 0x94, 0x00, 0x84, 0x40, 0x85,
33 0x8E, 0x94, 0x10, 0x87, 0x10, 0x84, 0x0E, 0xA9, 0x0E, 0xAA, 0xCE, 0xAA, 0x4E, 0xAC, 0x58, 0xAC
34 }};
35
36 // TODO: Move this into dsp::DSP service since it happens on the service side.
37 // Hardware observation: No data is returned if requested length reads beyond the end of the data in-pipe.
38 if (pipe2position + length > canned_response.size()) {
39 return {};
40 }
41
42 std::vector<u8> ret;
43 for (size_t i = 0; i < length; i++, pipe2position++) {
44 ret.emplace_back(canned_response[pipe2position]);
45 }
46
47 return ret;
48}
49
50void PipeWrite(u32 pipe_number, const std::vector<u8>& buffer) {
51 // TODO: proper pipe behaviour
52}
53
54} // namespace HLE
55} // namespace DSP
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h
new file mode 100644
index 000000000..ff6536950
--- /dev/null
+++ b/src/audio_core/hle/pipe.h
@@ -0,0 +1,38 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace DSP {
12namespace HLE {
13
14/// Reset the pipes by setting pipe positions back to the beginning.
15void ResetPipes();
16
17/**
18 * Read a DSP pipe.
19 * Pipe IDs:
20 * pipe_number = 0: Debug
21 * pipe_number = 1: P-DMA
22 * pipe_number = 2: Audio
23 * pipe_number = 3: Binary
24 * @param pipe_number The Pipe ID
25 * @param length How much data to request.
26 * @return The data read from the pipe. The size of this vector can be less than the length requested.
27 */
28std::vector<u8> PipeRead(u32 pipe_number, u32 length);
29
30/**
31 * Write to a DSP pipe.
32 * @param pipe_number The Pipe ID
33 * @param buffer The data to write to the pipe.
34 */
35void PipeWrite(u32 pipe_number, const std::vector<u8>& buffer);
36
37} // namespace HLE
38} // namespace DSP
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h
new file mode 100644
index 000000000..cad21a85e
--- /dev/null
+++ b/src/audio_core/sink.h
@@ -0,0 +1,34 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace AudioCore {
12
13/**
14 * This class is an interface for an audio sink. An audio sink accepts samples in stereo signed PCM16 format to be output.
15 * Sinks *do not* handle resampling and expect the correct sample rate. They are dumb outputs.
16 */
17class Sink {
18public:
19 virtual ~Sink() = default;
20
21 /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec)
22 virtual unsigned GetNativeSampleRate() const = 0;
23
24 /**
25 * Feed stereo samples to sink.
26 * @param samples Samples in interleaved stereo PCM16 format. Size of vector must be multiple of two.
27 */
28 virtual void EnqueueSamples(const std::vector<s16>& samples) = 0;
29
30 /// Samples enqueued that have not been played yet.
31 virtual std::size_t SamplesInQueue() const = 0;
32};
33
34} // namespace
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt
index e7f8a17f9..b9abb818e 100644
--- a/src/citra/CMakeLists.txt
+++ b/src/citra/CMakeLists.txt
@@ -17,7 +17,7 @@ include_directories(${GLFW_INCLUDE_DIRS})
17link_directories(${GLFW_LIBRARY_DIRS}) 17link_directories(${GLFW_LIBRARY_DIRS})
18 18
19add_executable(citra ${SRCS} ${HEADERS}) 19add_executable(citra ${SRCS} ${HEADERS})
20target_link_libraries(citra core video_core common) 20target_link_libraries(citra core video_core audio_core common)
21target_link_libraries(citra ${GLFW_LIBRARIES} ${OPENGL_gl_LIBRARY} inih glad) 21target_link_libraries(citra ${GLFW_LIBRARIES} ${OPENGL_gl_LIBRARY} inih glad)
22if (MSVC) 22if (MSVC)
23 target_link_libraries(citra getopt) 23 target_link_libraries(citra getopt)
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index bbf6ae001..b3d1205a4 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -79,7 +79,7 @@ if (APPLE)
79else() 79else()
80 add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) 80 add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS})
81endif() 81endif()
82target_link_libraries(citra-qt core video_core common qhexedit) 82target_link_libraries(citra-qt core video_core audio_core common qhexedit)
83target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) 83target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
84target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) 84target_link_libraries(citra-qt ${PLATFORM_LIBRARIES})
85 85
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 66689f398..371eb17a1 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -115,29 +115,24 @@ template<std::size_t position, std::size_t bits, typename T>
115struct BitField 115struct BitField
116{ 116{
117private: 117private:
118 // This constructor might be considered ambiguous: 118 // We hide the copy assigment operator here, because the default copy
119 // Would it initialize the storage or just the bitfield? 119 // assignment would copy the full storage value, rather than just the bits
120 // Hence, delete it. Use the assignment operator to set bitfield values! 120 // relevant to this particular bit field.
121 BitField(T val) = delete; 121 // We don't delete it because we want BitField to be trivially copyable.
122 BitField& operator=(const BitField&) = default;
122 123
123public: 124public:
125 // This constructor and assignment operator might be considered ambiguous:
126 // Would they initialize the storage or just the bitfield?
127 // Hence, delete them. Use the Assign method to set bitfield values!
128 BitField(T val) = delete;
129 BitField& operator=(T val) = delete;
130
124 // Force default constructor to be created 131 // Force default constructor to be created
125 // so that we can use this within unions 132 // so that we can use this within unions
126 BitField() = default; 133 BitField() = default;
127 134
128 // We explicitly delete the copy assigment operator here, because the 135 FORCE_INLINE operator T() const {
129 // default copy assignment would copy the full storage value, rather than
130 // just the bits relevant to this particular bit field.
131 BitField& operator=(const BitField&) = delete;
132
133 FORCE_INLINE BitField& operator=(T val)
134 {
135 Assign(val);
136 return *this;
137 }
138
139 FORCE_INLINE operator T() const
140 {
141 return Value(); 136 return Value();
142 } 137 }
143 138
@@ -145,8 +140,7 @@ public:
145 storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask()); 140 storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask());
146 } 141 }
147 142
148 FORCE_INLINE T Value() const 143 FORCE_INLINE T Value() const {
149 {
150 if (std::numeric_limits<T>::is_signed) 144 if (std::numeric_limits<T>::is_signed)
151 { 145 {
152 std::size_t shift = 8 * sizeof(T)-bits; 146 std::size_t shift = 8 * sizeof(T)-bits;
@@ -159,8 +153,7 @@ public:
159 } 153 }
160 154
161 // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 155 // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015
162 FORCE_INLINE bool ToBool() const 156 FORCE_INLINE bool ToBool() const {
163 {
164 return Value() != 0; 157 return Value() != 0;
165 } 158 }
166 159
@@ -176,8 +169,7 @@ private:
176 // Unsigned version of StorageType 169 // Unsigned version of StorageType
177 typedef typename std::make_unsigned<StorageType>::type StorageTypeU; 170 typedef typename std::make_unsigned<StorageType>::type StorageTypeU;
178 171
179 FORCE_INLINE StorageType GetMask() const 172 FORCE_INLINE StorageType GetMask() const {
180 {
181 return (((StorageTypeU)~0) >> (8 * sizeof(T)-bits)) << position; 173 return (((StorageTypeU)~0) >> (8 * sizeof(T)-bits)) << position;
182 } 174 }
183 175
@@ -189,6 +181,10 @@ private:
189 static_assert(position < 8 * sizeof(T), "Invalid position"); 181 static_assert(position < 8 * sizeof(T), "Invalid position");
190 static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); 182 static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
191 static_assert(bits > 0, "Invalid number of bits"); 183 static_assert(bits > 0, "Invalid number of bits");
192 static_assert(std::is_standard_layout<T>::value, "Invalid base type"); 184 static_assert(std::is_pod<T>::value, "Invalid base type");
193}; 185};
194#pragma pack() 186#pragma pack()
187
188#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
189static_assert(std::is_trivially_copyable<BitField<0, 1, u32>>::value, "BitField must be trivially copyable");
190#endif
diff --git a/src/common/emu_window.cpp b/src/common/emu_window.cpp
index b69b05cb9..b2807354a 100644
--- a/src/common/emu_window.cpp
+++ b/src/common/emu_window.cpp
@@ -55,14 +55,14 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
55 (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top); 55 (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top);
56 56
57 touch_pressed = true; 57 touch_pressed = true;
58 pad_state.touch = 1; 58 pad_state.touch.Assign(1);
59} 59}
60 60
61void EmuWindow::TouchReleased() { 61void EmuWindow::TouchReleased() {
62 touch_pressed = false; 62 touch_pressed = false;
63 touch_x = 0; 63 touch_x = 0;
64 touch_y = 0; 64 touch_y = 0;
65 pad_state.touch = 0; 65 pad_state.touch.Assign(0);
66} 66}
67 67
68void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) { 68void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) {
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index d186ba8f8..58819012d 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -58,6 +58,8 @@ namespace Log {
58 CLS(Render) \ 58 CLS(Render) \
59 SUB(Render, Software) \ 59 SUB(Render, Software) \
60 SUB(Render, OpenGL) \ 60 SUB(Render, OpenGL) \
61 CLS(Audio) \
62 SUB(Audio, DSP) \
61 CLS(Loader) 63 CLS(Loader)
62 64
63// GetClassName is a macro defined by Windows.h, grrr... 65// GetClassName is a macro defined by Windows.h, grrr...
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index c1f4d08e4..795d42ebd 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,25 +27,9 @@ struct Entry {
27 std::string message; 27 std::string message;
28 28
29 Entry() = default; 29 Entry() = default;
30 Entry(Entry&& o) = default;
30 31
31 // TODO(yuriks) Use defaulted move constructors once MSVC supports them 32 Entry& operator=(Entry&& o) = default;
32#define MOVE(member) member(std::move(o.member))
33 Entry(Entry&& o)
34 : MOVE(timestamp), MOVE(log_class), MOVE(log_level),
35 MOVE(location), MOVE(message)
36 {}
37#undef MOVE
38
39 Entry& operator=(const Entry&& o) {
40#define MOVE(member) member = std::move(o.member)
41 MOVE(timestamp);
42 MOVE(log_class);
43 MOVE(log_level);
44 MOVE(location);
45 MOVE(message);
46#undef MOVE
47 return *this;
48 }
49}; 33};
50 34
51/** 35/**
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 2d9323a7b..ec7bb00b8 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -73,6 +73,8 @@ enum class Class : ClassType {
73 Render, ///< Emulator video output and hardware acceleration 73 Render, ///< Emulator video output and hardware acceleration
74 Render_Software, ///< Software renderer backend 74 Render_Software, ///< Software renderer backend
75 Render_OpenGL, ///< OpenGL backend 75 Render_OpenGL, ///< OpenGL backend
76 Audio, ///< Emulator audio output
77 Audio_DSP, ///< The HLE implementation of the DSP
76 Loader, ///< ROM loader 78 Loader, ///< ROM loader
77 79
78 Count ///< Total number of logging classes 80 Count ///< Total number of logging classes
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 0cfb43fc7..862643448 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -7,6 +7,8 @@
7#include <utility> 7#include <utility>
8#include <vector> 8#include <vector>
9 9
10#include "audio_core/audio_core.h"
11
10#include "common/common_types.h" 12#include "common/common_types.h"
11#include "common/logging/log.h" 13#include "common/logging/log.h"
12 14
@@ -107,7 +109,6 @@ struct MemoryArea {
107static MemoryArea memory_areas[] = { 109static MemoryArea memory_areas[] = {
108 {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory 110 {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
109 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) 111 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
110 {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory
111 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory 112 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
112}; 113};
113 114
@@ -133,6 +134,8 @@ void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
133 auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, 134 auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR,
134 (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); 135 (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom();
135 address_space.Reprotect(shared_page_vma, VMAPermission::Read); 136 address_space.Reprotect(shared_page_vma, VMAPermission::Read);
137
138 AudioCore::AddAddressSpace(address_space);
136} 139}
137 140
138} // namespace 141} // namespace
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index d148efde2..16eb972fb 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -35,7 +35,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) {
35 35
36 process->codeset = std::move(code_set); 36 process->codeset = std::move(code_set);
37 process->flags.raw = 0; 37 process->flags.raw = 0;
38 process->flags.memory_region = MemoryRegion::APPLICATION; 38 process->flags.memory_region.Assign(MemoryRegion::APPLICATION);
39 Memory::InitLegacyAddressSpace(process->vm_manager); 39 Memory::InitLegacyAddressSpace(process->vm_manager);
40 40
41 return process; 41 return process;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index ea3abb5f6..0fce5988b 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -193,10 +193,10 @@ union ResultCode {
193 explicit ResultCode(u32 raw) : raw(raw) {} 193 explicit ResultCode(u32 raw) : raw(raw) {}
194 ResultCode(ErrorDescription description_, ErrorModule module_, 194 ResultCode(ErrorDescription description_, ErrorModule module_,
195 ErrorSummary summary_, ErrorLevel level_) : raw(0) { 195 ErrorSummary summary_, ErrorLevel level_) : raw(0) {
196 description = description_; 196 description.Assign(description_);
197 module = module_; 197 module.Assign(module_);
198 summary = summary_; 198 summary.Assign(summary_);
199 level = level_; 199 level.Assign(level_);
200 } 200 }
201 201
202 ResultCode& operator=(const ResultCode& o) { raw = o.raw; return *this; } 202 ResultCode& operator=(const ResultCode& o) { raw = o.raw; return *this; }
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp
index 633fe19eb..7556aa6a5 100644
--- a/src/core/hle/service/cfg/cfg.cpp
+++ b/src/core/hle/service/cfg/cfg.cpp
@@ -293,8 +293,8 @@ ResultCode DeleteConfigNANDSaveFile() {
293 293
294ResultCode UpdateConfigNANDSavegame() { 294ResultCode UpdateConfigNANDSavegame() {
295 FileSys::Mode mode = {}; 295 FileSys::Mode mode = {};
296 mode.write_flag = 1; 296 mode.write_flag.Assign(1);
297 mode.create_flag = 1; 297 mode.create_flag.Assign(1);
298 298
299 FileSys::Path path("config"); 299 FileSys::Path path("config");
300 300
@@ -405,7 +405,7 @@ void Init() {
405 405
406 FileSys::Path config_path("config"); 406 FileSys::Path config_path("config");
407 FileSys::Mode open_mode = {}; 407 FileSys::Mode open_mode = {};
408 open_mode.read_flag = 1; 408 open_mode.read_flag.Assign(1);
409 409
410 auto config_result = Service::FS::OpenFileFromArchive(*archive_result, config_path, open_mode); 410 auto config_result = Service::FS::OpenFileFromArchive(*archive_result, config_path, open_mode);
411 411
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index f9f931f6d..15d3274ec 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "audio_core/hle/pipe.h"
6
5#include "common/logging/log.h" 7#include "common/logging/log.h"
6 8
7#include "core/hle/kernel/event.h" 9#include "core/hle/kernel/event.h"
@@ -14,17 +16,30 @@ namespace DSP_DSP {
14 16
15static u32 read_pipe_count; 17static u32 read_pipe_count;
16static Kernel::SharedPtr<Kernel::Event> semaphore_event; 18static Kernel::SharedPtr<Kernel::Event> semaphore_event;
17static Kernel::SharedPtr<Kernel::Event> interrupt_event;
18 19
19void SignalInterrupt() { 20struct PairHash {
20 // TODO(bunnei): This is just a stub, it does not do anything other than signal to the emulated 21 template <typename T, typename U>
21 // application that a DSP interrupt occurred, without specifying which one. Since we do not 22 std::size_t operator()(const std::pair<T, U> &x) const {
22 // emulate the DSP yet (and how it works is largely unknown), this is a work around to get games 23 // TODO(yuriks): Replace with better hash combining function.
23 // that check the DSP interrupt signal event to run. We should figure out the different types of 24 return std::hash<T>()(x.first) ^ std::hash<U>()(x.second);
24 // DSP interrupts, and trigger them at the appropriate times. 25 }
26};
27
28/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents
29static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events;
30
31// DSP Interrupts:
32// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting
33// for an interrupt event. Immediately after this interrupt event, userland normally updates the
34// state in the next region and increments the relevant frame counter by two.
35void SignalAllInterrupts() {
36 // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case.
37 for (auto& interrupt_event : interrupt_events)
38 interrupt_event.second->Signal();
39}
25 40
26 if (interrupt_event != 0) 41void SignalInterrupt(u32 interrupt, u32 channel) {
27 interrupt_event->Signal(); 42 interrupt_events[std::make_pair(interrupt, channel)]->Signal();
28} 43}
29 44
30/** 45/**
@@ -43,7 +58,7 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) {
43 cmd_buff[1] = 0; // No error 58 cmd_buff[1] = 0; // No error
44 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); 59 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000);
45 60
46 LOG_WARNING(Service_DSP, "(STUBBED) called with address 0x%08X", addr); 61 LOG_TRACE(Service_DSP, "addr=0x%08X", addr);
47} 62}
48 63
49/** 64/**
@@ -121,8 +136,8 @@ static void FlushDataCache(Service::Interface* self) {
121/** 136/**
122 * DSP_DSP::RegisterInterruptEvents service function 137 * DSP_DSP::RegisterInterruptEvents service function
123 * Inputs: 138 * Inputs:
124 * 1 : Parameter 0 (purpose unknown) 139 * 1 : Interrupt Number
125 * 2 : Parameter 1 (purpose unknown) 140 * 2 : Channel Number
126 * 4 : Interrupt event handle 141 * 4 : Interrupt event handle
127 * Outputs: 142 * Outputs:
128 * 1 : Result of function, 0 on success, otherwise error code 143 * 1 : Result of function, 0 on success, otherwise error code
@@ -130,22 +145,24 @@ static void FlushDataCache(Service::Interface* self) {
130static void RegisterInterruptEvents(Service::Interface* self) { 145static void RegisterInterruptEvents(Service::Interface* self) {
131 u32* cmd_buff = Kernel::GetCommandBuffer(); 146 u32* cmd_buff = Kernel::GetCommandBuffer();
132 147
133 u32 param0 = cmd_buff[1]; 148 u32 interrupt = cmd_buff[1];
134 u32 param1 = cmd_buff[2]; 149 u32 channel = cmd_buff[2];
135 u32 event_handle = cmd_buff[4]; 150 u32 event_handle = cmd_buff[4];
136 151
137 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); 152 if (event_handle) {
138 if (evt != nullptr) { 153 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
139 interrupt_event = evt; 154 if (evt) {
140 cmd_buff[1] = 0; // No error 155 interrupt_events[std::make_pair(interrupt, channel)] = evt;
156 cmd_buff[1] = RESULT_SUCCESS.raw;
157 LOG_WARNING(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle);
158 } else {
159 cmd_buff[1] = -1;
160 LOG_ERROR(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle);
161 }
141 } else { 162 } else {
142 LOG_ERROR(Service_DSP, "called with invalid handle=%08X", cmd_buff[4]); 163 interrupt_events.erase(std::make_pair(interrupt, channel));
143 164 LOG_WARNING(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle);
144 // TODO(yuriks): An error should be returned from SendSyncRequest, not in the cmdbuf
145 cmd_buff[1] = -1;
146 } 165 }
147
148 LOG_WARNING(Service_DSP, "(STUBBED) called param0=%u, param1=%u, event_handle=0x%08X", param0, param1, event_handle);
149} 166}
150 167
151/** 168/**
@@ -158,8 +175,6 @@ static void RegisterInterruptEvents(Service::Interface* self) {
158static void SetSemaphore(Service::Interface* self) { 175static void SetSemaphore(Service::Interface* self) {
159 u32* cmd_buff = Kernel::GetCommandBuffer(); 176 u32* cmd_buff = Kernel::GetCommandBuffer();
160 177
161 SignalInterrupt();
162
163 cmd_buff[1] = 0; // No error 178 cmd_buff[1] = 0; // No error
164 179
165 LOG_WARNING(Service_DSP, "(STUBBED) called"); 180 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -168,9 +183,9 @@ static void SetSemaphore(Service::Interface* self) {
168/** 183/**
169 * DSP_DSP::WriteProcessPipe service function 184 * DSP_DSP::WriteProcessPipe service function
170 * Inputs: 185 * Inputs:
171 * 1 : Number 186 * 1 : Channel
172 * 2 : Size 187 * 2 : Size
173 * 3 : (size <<14) | 0x402 188 * 3 : (size << 14) | 0x402
174 * 4 : Buffer 189 * 4 : Buffer
175 * Outputs: 190 * Outputs:
176 * 0 : Return header 191 * 0 : Return header
@@ -179,21 +194,42 @@ static void SetSemaphore(Service::Interface* self) {
179static void WriteProcessPipe(Service::Interface* self) { 194static void WriteProcessPipe(Service::Interface* self) {
180 u32* cmd_buff = Kernel::GetCommandBuffer(); 195 u32* cmd_buff = Kernel::GetCommandBuffer();
181 196
182 u32 number = cmd_buff[1]; 197 u32 channel = cmd_buff[1];
183 u32 size = cmd_buff[2]; 198 u32 size = cmd_buff[2];
184 u32 new_size = cmd_buff[3];
185 u32 buffer = cmd_buff[4]; 199 u32 buffer = cmd_buff[4];
186 200
201 if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) {
202 LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). channel=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], channel, size, buffer);
203 cmd_buff[1] = -1; // TODO
204 return;
205 }
206
207 if (!Memory::GetPointer(buffer)) {
208 LOG_ERROR(Service_DSP, "Invalid Buffer: channel=%u, size=0x%X, buffer=0x%08X", channel, size, buffer);
209 cmd_buff[1] = -1; // TODO
210 return;
211 }
212
213 std::vector<u8> message(size);
214
215 for (size_t i = 0; i < size; i++) {
216 message[i] = Memory::Read8(buffer + i);
217 }
218
219 DSP::HLE::PipeWrite(channel, message);
220
187 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 221 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
188 222
189 LOG_WARNING(Service_DSP, "(STUBBED) called number=%u, size=0x%X, new_size=0x%X, buffer=0x%08X", 223 LOG_TRACE(Service_DSP, "channel=%u, size=0x%X, buffer=0x%08X", channel, size, buffer);
190 number, size, new_size, buffer);
191} 224}
192 225
193/** 226/**
194 * DSP_DSP::ReadPipeIfPossible service function 227 * DSP_DSP::ReadPipeIfPossible service function
228 * A pipe is a means of communication between the ARM11 and DSP that occurs on
229 * hardware by writing to/reading from the DSP registers at 0x10203000.
230 * Pipes are used for initialisation. See also DSP::HLE::PipeRead.
195 * Inputs: 231 * Inputs:
196 * 1 : Unknown 232 * 1 : Pipe Number
197 * 2 : Unknown 233 * 2 : Unknown
198 * 3 : Size in bytes of read (observed only lower half word used) 234 * 3 : Size in bytes of read (observed only lower half word used)
199 * 0x41 : Virtual address to read from DSP pipe to in memory 235 * 0x41 : Virtual address to read from DSP pipe to in memory
@@ -204,35 +240,25 @@ static void WriteProcessPipe(Service::Interface* self) {
204static void ReadPipeIfPossible(Service::Interface* self) { 240static void ReadPipeIfPossible(Service::Interface* self) {
205 u32* cmd_buff = Kernel::GetCommandBuffer(); 241 u32* cmd_buff = Kernel::GetCommandBuffer();
206 242
207 u32 unk1 = cmd_buff[1]; 243 u32 pipe = cmd_buff[1];
208 u32 unk2 = cmd_buff[2]; 244 u32 unk2 = cmd_buff[2];
209 u32 size = cmd_buff[3] & 0xFFFF;// Lower 16 bits are size 245 u32 size = cmd_buff[3] & 0xFFFF;// Lower 16 bits are size
210 VAddr addr = cmd_buff[0x41]; 246 VAddr addr = cmd_buff[0x41];
211 247
212 // Canned DSP responses that games expect. These were taken from HW by 3dmoo team. 248 if (!Memory::GetPointer(addr)) {
213 // TODO: Remove this hack :) 249 LOG_ERROR(Service_DSP, "Invalid addr: pipe=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", pipe, unk2, size, addr);
214 static const std::array<u16, 16> canned_read_pipe = {{ 250 cmd_buff[1] = -1; // TODO
215 0x000F, 0xBFFF, 0x9E8E, 0x8680, 0xA78E, 0x9430, 0x8400, 0x8540, 251 return;
216 0x948E, 0x8710, 0x8410, 0xA90E, 0xAA0E, 0xAACE, 0xAC4E, 0xAC58 252 }
217 }};
218 253
219 u32 initial_size = read_pipe_count; 254 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
220 255
221 for (unsigned offset = 0; offset < size; offset += sizeof(u16)) { 256 Memory::WriteBlock(addr, response.data(), response.size());
222 if (read_pipe_count < canned_read_pipe.size()) {
223 Memory::Write16(addr + offset, canned_read_pipe[read_pipe_count]);
224 read_pipe_count++;
225 } else {
226 LOG_ERROR(Service_DSP, "canned read pipe log exceeded!");
227 break;
228 }
229 }
230 257
231 cmd_buff[1] = 0; // No error 258 cmd_buff[1] = 0; // No error
232 cmd_buff[2] = (read_pipe_count - initial_size) * sizeof(u16); 259 cmd_buff[2] = (u32)response.size();
233 260
234 LOG_WARNING(Service_DSP, "(STUBBED) called unk1=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", 261 LOG_TRACE(Service_DSP, "pipe=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", pipe, unk2, size, addr);
235 unk1, unk2, size, addr);
236} 262}
237 263
238/** 264/**
@@ -311,7 +337,6 @@ const Interface::FunctionInfo FunctionTable[] = {
311 337
312Interface::Interface() { 338Interface::Interface() {
313 semaphore_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "DSP_DSP::semaphore_event"); 339 semaphore_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "DSP_DSP::semaphore_event");
314 interrupt_event = nullptr;
315 read_pipe_count = 0; 340 read_pipe_count = 0;
316 341
317 Register(FunctionTable); 342 Register(FunctionTable);
@@ -319,7 +344,7 @@ Interface::Interface() {
319 344
320Interface::~Interface() { 345Interface::~Interface() {
321 semaphore_event = nullptr; 346 semaphore_event = nullptr;
322 interrupt_event = nullptr; 347 interrupt_events.clear();
323} 348}
324 349
325} // namespace 350} // namespace
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index b6f611db5..32b89e9bb 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -23,7 +23,15 @@ public:
23 } 23 }
24}; 24};
25 25
26/// Signals that a DSP interrupt has occurred to userland code 26/// Signal all audio related interrupts.
27void SignalInterrupt(); 27void SignalAllInterrupts();
28
29/**
30 * Signal a specific audio related interrupt based on interrupt id and channel id.
31 * @param interrupt_id The interrupt id
32 * @param channel_id The channel id
33 * The significance of various values of interrupt_id and channel_id is not yet known.
34 */
35void SignalInterrupt(u32 interrupt_id, u32 channel_id);
28 36
29} // namespace 37} // namespace
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 98b11c798..5838b6d71 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -347,7 +347,7 @@ void SignalInterrupt(InterruptId interrupt_id) {
347 FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); 347 FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id);
348 if (info->is_dirty) { 348 if (info->is_dirty) {
349 SetBufferSwap(screen_id, info->framebuffer_info[info->index]); 349 SetBufferSwap(screen_id, info->framebuffer_info[info->index]);
350 info->is_dirty = false; 350 info->is_dirty.Assign(false);
351 } 351 }
352 } 352 }
353 } 353 }
@@ -499,7 +499,7 @@ static void SetLcdForceBlack(Service::Interface* self) {
499 499
500 // Since data is already zeroed, there is no need to explicitly set 500 // Since data is already zeroed, there is no need to explicitly set
501 // the color to black (all zero). 501 // the color to black (all zero).
502 data.is_enabled = enable_black; 502 data.is_enabled.Assign(enable_black);
503 503
504 LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD 504 LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD
505 LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD 505 LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD
@@ -521,7 +521,7 @@ static void TriggerCmdReqQueue(Service::Interface* self) {
521 ExecuteCommand(command_buffer->commands[i], thread_id); 521 ExecuteCommand(command_buffer->commands[i], thread_id);
522 522
523 // Indicates that command has completed 523 // Indicates that command has completed
524 command_buffer->number_commands = command_buffer->number_commands - 1; 524 command_buffer->number_commands.Assign(command_buffer->number_commands - 1);
525 } 525 }
526 } 526 }
527 527
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 0bed0ce36..11d7e69a1 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -105,7 +105,7 @@ void Update() {
105 bool pressed = false; 105 bool pressed = false;
106 106
107 std::tie(touch_entry->x, touch_entry->y, pressed) = VideoCore::g_emu_window->GetTouchState(); 107 std::tie(touch_entry->x, touch_entry->y, pressed) = VideoCore::g_emu_window->GetTouchState();
108 touch_entry->valid = pressed ? 1 : 0; 108 touch_entry->valid.Assign(pressed ? 1 : 0);
109 109
110 // TODO(bunnei): We're not doing anything with offset 0xA8 + 0x18 of HID SharedMemory, which 110 // TODO(bunnei): We're not doing anything with offset 0xA8 + 0x18 of HID SharedMemory, which
111 // supposedly is "Touch-screen entry, which contains the raw coordinate data prior to being 111 // supposedly is "Touch-screen entry, which contains the raw coordinate data prior to being
diff --git a/src/core/hle/service/ptm/ptm.cpp b/src/core/hle/service/ptm/ptm.cpp
index 22c1093ff..6bdee4d9e 100644
--- a/src/core/hle/service/ptm/ptm.cpp
+++ b/src/core/hle/service/ptm/ptm.cpp
@@ -110,8 +110,8 @@ void Init() {
110 110
111 FileSys::Path gamecoin_path("gamecoin.dat"); 111 FileSys::Path gamecoin_path("gamecoin.dat");
112 FileSys::Mode open_mode = {}; 112 FileSys::Mode open_mode = {};
113 open_mode.write_flag = 1; 113 open_mode.write_flag.Assign(1);
114 open_mode.create_flag = 1; 114 open_mode.create_flag.Assign(1);
115 // Open the file and write the default gamecoin information 115 // Open the file and write the default gamecoin information
116 auto gamecoin_result = Service::FS::OpenFileFromArchive(*archive_result, gamecoin_path, open_mode); 116 auto gamecoin_result = Service::FS::OpenFileFromArchive(*archive_result, gamecoin_path, open_mode);
117 if (gamecoin_result.Succeeded()) { 117 if (gamecoin_result.Succeeded()) {
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp
index 822b093f4..e603bf794 100644
--- a/src/core/hle/service/soc_u.cpp
+++ b/src/core/hle/service/soc_u.cpp
@@ -178,17 +178,17 @@ struct CTRPollFD {
178 static Events TranslateTo3DS(u32 input_event) { 178 static Events TranslateTo3DS(u32 input_event) {
179 Events ev = {}; 179 Events ev = {};
180 if (input_event & POLLIN) 180 if (input_event & POLLIN)
181 ev.pollin = 1; 181 ev.pollin.Assign(1);
182 if (input_event & POLLPRI) 182 if (input_event & POLLPRI)
183 ev.pollpri = 1; 183 ev.pollpri.Assign(1);
184 if (input_event & POLLHUP) 184 if (input_event & POLLHUP)
185 ev.pollhup = 1; 185 ev.pollhup.Assign(1);
186 if (input_event & POLLERR) 186 if (input_event & POLLERR)
187 ev.pollerr = 1; 187 ev.pollerr.Assign(1);
188 if (input_event & POLLOUT) 188 if (input_event & POLLOUT)
189 ev.pollout = 1; 189 ev.pollout.Assign(1);
190 if (input_event & POLLNVAL) 190 if (input_event & POLLNVAL)
191 ev.pollnval = 1; 191 ev.pollnval.Assign(1);
192 return ev; 192 return ev;
193 } 193 }
194 194
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 4bd3a632d..5312baa83 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -17,7 +17,6 @@
17#include "core/core_timing.h" 17#include "core/core_timing.h"
18 18
19#include "core/hle/service/gsp_gpu.h" 19#include "core/hle/service/gsp_gpu.h"
20#include "core/hle/service/dsp_dsp.h"
21#include "core/hle/service/hid/hid.h" 20#include "core/hle/service/hid/hid.h"
22 21
23#include "core/hw/hw.h" 22#include "core/hw/hw.h"
@@ -146,8 +145,8 @@ inline void Write(u32 addr, const T data) {
146 145
147 // Reset "trigger" flag and set the "finish" flag 146 // Reset "trigger" flag and set the "finish" flag
148 // NOTE: This was confirmed to happen on hardware even if "address_start" is zero. 147 // NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
149 config.trigger = 0; 148 config.trigger.Assign(0);
150 config.finished = 1; 149 config.finished.Assign(1);
151 } 150 }
152 break; 151 break;
153 } 152 }
@@ -414,11 +413,6 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
414 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); 413 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
415 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); 414 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
416 415
417 // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but
418 // until we can emulate DSP interrupts, this is probably the only reasonable place to do
419 // this. Certain games expect this to be periodically signaled.
420 DSP_DSP::SignalInterrupt();
421
422 // Check for user input updates 416 // Check for user input updates
423 Service::HID::Update(); 417 Service::HID::Update();
424 418
@@ -444,16 +438,16 @@ void Init() {
444 framebuffer_sub.address_left1 = 0x1848F000; 438 framebuffer_sub.address_left1 = 0x1848F000;
445 framebuffer_sub.address_left2 = 0x184C7800; 439 framebuffer_sub.address_left2 = 0x184C7800;
446 440
447 framebuffer_top.width = 240; 441 framebuffer_top.width.Assign(240);
448 framebuffer_top.height = 400; 442 framebuffer_top.height.Assign(400);
449 framebuffer_top.stride = 3 * 240; 443 framebuffer_top.stride = 3 * 240;
450 framebuffer_top.color_format = Regs::PixelFormat::RGB8; 444 framebuffer_top.color_format.Assign(Regs::PixelFormat::RGB8);
451 framebuffer_top.active_fb = 0; 445 framebuffer_top.active_fb = 0;
452 446
453 framebuffer_sub.width = 240; 447 framebuffer_sub.width.Assign(240);
454 framebuffer_sub.height = 320; 448 framebuffer_sub.height.Assign(320);
455 framebuffer_sub.stride = 3 * 240; 449 framebuffer_sub.stride = 3 * 240;
456 framebuffer_sub.color_format = Regs::PixelFormat::RGB8; 450 framebuffer_sub.color_format.Assign(Regs::PixelFormat::RGB8);
457 framebuffer_sub.active_fb = 0; 451 framebuffer_sub.active_fb = 0;
458 452
459 last_skip_frame = false; 453 last_skip_frame = false;
diff --git a/src/core/system.cpp b/src/core/system.cpp
index 7e9c56538..b62ebf69e 100644
--- a/src/core/system.cpp
+++ b/src/core/system.cpp
@@ -2,9 +2,12 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "audio_core/audio_core.h"
6
5#include "core/core.h" 7#include "core/core.h"
6#include "core/core_timing.h" 8#include "core/core_timing.h"
7#include "core/system.h" 9#include "core/system.h"
10#include "core/gdbstub/gdbstub.h"
8#include "core/hw/hw.h" 11#include "core/hw/hw.h"
9#include "core/hle/hle.h" 12#include "core/hle/hle.h"
10#include "core/hle/kernel/kernel.h" 13#include "core/hle/kernel/kernel.h"
@@ -12,8 +15,6 @@
12 15
13#include "video_core/video_core.h" 16#include "video_core/video_core.h"
14 17
15#include "core/gdbstub/gdbstub.h"
16
17namespace System { 18namespace System {
18 19
19void Init(EmuWindow* emu_window) { 20void Init(EmuWindow* emu_window) {
@@ -24,11 +25,13 @@ void Init(EmuWindow* emu_window) {
24 Kernel::Init(); 25 Kernel::Init();
25 HLE::Init(); 26 HLE::Init();
26 VideoCore::Init(emu_window); 27 VideoCore::Init(emu_window);
28 AudioCore::Init();
27 GDBStub::Init(); 29 GDBStub::Init();
28} 30}
29 31
30void Shutdown() { 32void Shutdown() {
31 GDBStub::Shutdown(); 33 GDBStub::Shutdown();
34 AudioCore::Shutdown();
32 VideoCore::Shutdown(); 35 VideoCore::Shutdown();
33 HLE::Shutdown(); 36 HLE::Shutdown();
34 Kernel::Shutdown(); 37 Kernel::Shutdown();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c3d7294d5..4b5d298f3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -33,6 +33,7 @@ set(HEADERS
33 command_processor.h 33 command_processor.h
34 gpu_debugger.h 34 gpu_debugger.h
35 pica.h 35 pica.h
36 pica_types.h
36 primitive_assembly.h 37 primitive_assembly.h
37 rasterizer.h 38 rasterizer.h
38 rasterizer_interface.h 39 rasterizer_interface.h
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 5d609da06..a385589d2 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -59,15 +59,17 @@ static void InitScreenCoordinates(OutputVertex& vtx)
59 } viewport; 59 } viewport;
60 60
61 const auto& regs = g_state.regs; 61 const auto& regs = g_state.regs;
62 viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); 62 viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x);
63 viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); 63 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
64 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 64 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
65 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 65 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
66 viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range); 66 viewport.zscale = float24::FromRaw(regs.viewport_depth_range);
67 viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane); 67 viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane);
68 68
69 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 69 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
70 vtx.color *= inv_w; 70 vtx.color *= inv_w;
71 vtx.view *= inv_w;
72 vtx.quat *= inv_w;
71 vtx.tc0 *= inv_w; 73 vtx.tc0 *= inv_w;
72 vtx.tc1 *= inv_w; 74 vtx.tc1 *= inv_w;
73 vtx.tc2 *= inv_w; 75 vtx.tc2 *= inv_w;
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 59c75042c..73fdfbe9c 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -98,10 +98,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
98 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; 98 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index];
99 99
100 // NOTE: The destination component order indeed is "backwards" 100 // NOTE: The destination component order indeed is "backwards"
101 attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); 101 attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
102 attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); 102 attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
103 attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); 103 attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
104 attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); 104 attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
105 105
106 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, 106 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
107 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), 107 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
@@ -157,15 +157,25 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
157 157
158 // TODO: What happens if a loader overwrites a previous one's data? 158 // TODO: What happens if a loader overwrites a previous one's data?
159 for (unsigned component = 0; component < loader_config.component_count; ++component) { 159 for (unsigned component = 0; component < loader_config.component_count; ++component) {
160 if (component >= 12) 160 if (component >= 12) {
161 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); 161 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
162 continue;
163 }
164
162 u32 attribute_index = loader_config.GetComponent(component); 165 u32 attribute_index = loader_config.GetComponent(component);
163 vertex_attribute_sources[attribute_index] = load_address; 166 if (attribute_index < 12) {
164 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); 167 vertex_attribute_sources[attribute_index] = load_address;
165 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); 168 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
166 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); 169 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
167 vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); 170 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
168 load_address += attribute_config.GetStride(attribute_index); 171 vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index);
172 load_address += attribute_config.GetStride(attribute_index);
173 } else if (attribute_index < 16) {
174 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
175 load_address += (attribute_index - 11) * 4;
176 } else {
177 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
178 }
169 } 179 }
170 } 180 }
171 181
@@ -418,10 +428,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
418 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); 428 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
419 } else { 429 } else {
420 // TODO: Untested 430 // TODO: Untested
421 uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); 431 uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
422 uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); 432 uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
423 uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); 433 uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
424 uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); 434 uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
425 } 435 }
426 436
427 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, 437 LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
@@ -429,7 +439,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
429 uniform.w.ToFloat32()); 439 uniform.w.ToFloat32());
430 440
431 // TODO: Verify that this actually modifies the register! 441 // TODO: Verify that this actually modifies the register!
432 uniform_setup.index = uniform_setup.index + 1; 442 uniform_setup.index.Assign(uniform_setup.index + 1);
433 } 443 }
434 break; 444 break;
435 } 445 }
@@ -464,6 +474,24 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
464 break; 474 break;
465 } 475 }
466 476
477 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
478 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
479 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
480 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
481 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
482 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
483 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
484 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
485 {
486 auto& lut_config = regs.lighting.lut_config;
487
488 ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!");
489
490 g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
491 lut_config.index.Assign(lut_config.index + 1);
492 break;
493 }
494
467 default: 495 default:
468 break; 496 break;
469 } 497 }
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 4f66dbd65..6e6fd7335 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -201,11 +201,11 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
201 201
202 if (it == output_info_table.end()) { 202 if (it == output_info_table.end()) {
203 output_info_table.emplace_back(); 203 output_info_table.emplace_back();
204 output_info_table.back().type = type; 204 output_info_table.back().type.Assign(type);
205 output_info_table.back().component_mask = component_mask; 205 output_info_table.back().component_mask.Assign(component_mask);
206 output_info_table.back().id = i; 206 output_info_table.back().id.Assign(i);
207 } else { 207 } else {
208 it->component_mask = it->component_mask | component_mask; 208 it->component_mask.Assign(it->component_mask | component_mask);
209 } 209 }
210 } catch (const std::out_of_range& ) { 210 } catch (const std::out_of_range& ) {
211 DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); 211 DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping");
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 2f1b2dec4..9077b1725 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -16,6 +16,8 @@
16#include "common/vector_math.h" 16#include "common/vector_math.h"
17#include "common/logging/log.h" 17#include "common/logging/log.h"
18 18
19#include "pica_types.h"
20
19namespace Pica { 21namespace Pica {
20 22
21// Returns index corresponding to the Regs member labeled by field_name 23// Returns index corresponding to the Regs member labeled by field_name
@@ -239,7 +241,8 @@ struct Regs {
239 TextureConfig texture0; 241 TextureConfig texture0;
240 INSERT_PADDING_WORDS(0x8); 242 INSERT_PADDING_WORDS(0x8);
241 BitField<0, 4, TextureFormat> texture0_format; 243 BitField<0, 4, TextureFormat> texture0_format;
242 INSERT_PADDING_WORDS(0x2); 244 BitField<0, 1, u32> fragment_lighting_enable;
245 INSERT_PADDING_WORDS(0x1);
243 TextureConfig texture1; 246 TextureConfig texture1;
244 BitField<0, 4, TextureFormat> texture1_format; 247 BitField<0, 4, TextureFormat> texture1_format;
245 INSERT_PADDING_WORDS(0x2); 248 INSERT_PADDING_WORDS(0x2);
@@ -641,7 +644,268 @@ struct Regs {
641 } 644 }
642 } 645 }
643 646
644 INSERT_PADDING_WORDS(0xe0); 647 INSERT_PADDING_WORDS(0x20);
648
649 enum class LightingSampler {
650 Distribution0 = 0,
651 Distribution1 = 1,
652 Fresnel = 3,
653 ReflectBlue = 4,
654 ReflectGreen = 5,
655 ReflectRed = 6,
656 SpotlightAttenuation = 8,
657 DistanceAttenuation = 16,
658 };
659
660 /**
661 * Pica fragment lighting supports using different LUTs for each lighting component:
662 * Reflectance R, G, and B channels, distribution function for specular components 0 and 1,
663 * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel
664 * (or whether a channel is enabled at all) is specified by various pre-defined lighting
665 * configurations. With configurations that require more LUTs, more cycles are required on HW to
666 * perform lighting computations.
667 */
668 enum class LightingConfig {
669 Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight
670 Config1 = 1, ///< Reflect Red, Fresnel, Spotlight
671 Config2 = 2, ///< Reflect Red, Distribution 0/1
672 Config3 = 3, ///< Distribution 0/1, Fresnel
673 Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight
674 Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight
675 Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight
676 Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight
677 ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration
678 };
679
680 /// Selects which lighting components are affected by fresnel
681 enum class LightingFresnelSelector {
682 None = 0, ///< Fresnel is disabled
683 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel
684 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel
685 Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
686 };
687
688 /// Factor used to scale the output of a lighting LUT
689 enum class LightingScale {
690 Scale1 = 0, ///< Scale is 1x
691 Scale2 = 1, ///< Scale is 2x
692 Scale4 = 2, ///< Scale is 4x
693 Scale8 = 3, ///< Scale is 8x
694 Scale1_4 = 6, ///< Scale is 0.25x
695 Scale1_2 = 7, ///< Scale is 0.5x
696 };
697
698 enum class LightingLutInput {
699 NH = 0, // Cosine of the angle between the normal and half-angle vectors
700 VH = 1, // Cosine of the angle between the view and half-angle vectors
701 NV = 2, // Cosine of the angle between the normal and the view vector
702 LN = 3, // Cosine of the angle between the light and the normal vectors
703 };
704
705 enum class LightingBumpMode : u32 {
706 None = 0,
707 NormalMap = 1,
708 TangentMap = 2,
709 };
710
711 union LightColor {
712 BitField< 0, 10, u32> b;
713 BitField<10, 10, u32> g;
714 BitField<20, 10, u32> r;
715
716 Math::Vec3f ToVec3f() const {
717 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component
718 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
719 }
720 };
721
722 /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration
723 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
724 switch (sampler) {
725 case LightingSampler::Distribution0:
726 return (config != LightingConfig::Config1);
727
728 case LightingSampler::Distribution1:
729 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5);
730
731 case LightingSampler::Fresnel:
732 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4);
733
734 case LightingSampler::ReflectRed:
735 return (config != LightingConfig::Config3);
736
737 case LightingSampler::ReflectGreen:
738 case LightingSampler::ReflectBlue:
739 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
740 }
741 return false;
742 }
743
744 struct {
745 struct LightSrc {
746 LightColor specular_0; // material.specular_0 * light.specular_0
747 LightColor specular_1; // material.specular_1 * light.specular_1
748 LightColor diffuse; // material.diffuse * light.diffuse
749 LightColor ambient; // material.ambient * light.ambient
750
751 struct {
752 // Encoded as 16-bit floating point
753 union {
754 BitField< 0, 16, u32> x;
755 BitField<16, 16, u32> y;
756 };
757 union {
758 BitField< 0, 16, u32> z;
759 };
760
761 INSERT_PADDING_WORDS(0x3);
762
763 union {
764 BitField<0, 1, u32> directional;
765 BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
766 };
767 };
768
769 BitField<0, 20, u32> dist_atten_bias;
770 BitField<0, 20, u32> dist_atten_scale;
771
772 INSERT_PADDING_WORDS(0x4);
773 };
774 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words");
775
776 LightSrc light[8];
777 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
778 INSERT_PADDING_WORDS(0x1);
779 BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
780
781 union {
782 BitField< 2, 2, LightingFresnelSelector> fresnel_selector;
783 BitField< 4, 4, LightingConfig> config;
784 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
785 BitField<27, 1, u32> clamp_highlights;
786 BitField<28, 2, LightingBumpMode> bump_mode;
787 BitField<30, 1, u32> disable_bump_renorm;
788 };
789
790 union {
791 BitField<16, 1, u32> disable_lut_d0;
792 BitField<17, 1, u32> disable_lut_d1;
793 BitField<19, 1, u32> disable_lut_fr;
794 BitField<20, 1, u32> disable_lut_rr;
795 BitField<21, 1, u32> disable_lut_rg;
796 BitField<22, 1, u32> disable_lut_rb;
797
798 // Each bit specifies whether distance attenuation should be applied for the
799 // corresponding light
800
801 BitField<24, 1, u32> disable_dist_atten_light_0;
802 BitField<25, 1, u32> disable_dist_atten_light_1;
803 BitField<26, 1, u32> disable_dist_atten_light_2;
804 BitField<27, 1, u32> disable_dist_atten_light_3;
805 BitField<28, 1, u32> disable_dist_atten_light_4;
806 BitField<29, 1, u32> disable_dist_atten_light_5;
807 BitField<30, 1, u32> disable_dist_atten_light_6;
808 BitField<31, 1, u32> disable_dist_atten_light_7;
809 };
810
811 bool IsDistAttenDisabled(unsigned index) const {
812 const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1,
813 disable_dist_atten_light_2, disable_dist_atten_light_3,
814 disable_dist_atten_light_4, disable_dist_atten_light_5,
815 disable_dist_atten_light_6, disable_dist_atten_light_7 };
816 return disable[index] != 0;
817 }
818
819 union {
820 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
821 BitField<8, 5, u32> type; ///< Type of LUT for which to set data
822 } lut_config;
823
824 BitField<0, 1, u32> disable;
825 INSERT_PADDING_WORDS(0x1);
826
827 // When data is written to any of these registers, it gets written to the lookup table of
828 // the selected type at the selected index, specified above in the `lut_config` register.
829 // With each write, `lut_config.index` is incremented. It does not matter which of these
830 // registers is written to, the behavior will be the same.
831 u32 lut_data[8];
832
833 // These are used to specify if absolute (abs) value should be used for each LUT index. When
834 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
835 // the range of (0.0, 1.0).
836 union {
837 BitField< 1, 1, u32> disable_d0;
838 BitField< 5, 1, u32> disable_d1;
839 BitField< 9, 1, u32> disable_sp;
840 BitField<13, 1, u32> disable_fr;
841 BitField<17, 1, u32> disable_rb;
842 BitField<21, 1, u32> disable_rg;
843 BitField<25, 1, u32> disable_rr;
844 } abs_lut_input;
845
846 union {
847 BitField< 0, 3, LightingLutInput> d0;
848 BitField< 4, 3, LightingLutInput> d1;
849 BitField< 8, 3, LightingLutInput> sp;
850 BitField<12, 3, LightingLutInput> fr;
851 BitField<16, 3, LightingLutInput> rb;
852 BitField<20, 3, LightingLutInput> rg;
853 BitField<24, 3, LightingLutInput> rr;
854 } lut_input;
855
856 union {
857 BitField< 0, 3, LightingScale> d0;
858 BitField< 4, 3, LightingScale> d1;
859 BitField< 8, 3, LightingScale> sp;
860 BitField<12, 3, LightingScale> fr;
861 BitField<16, 3, LightingScale> rb;
862 BitField<20, 3, LightingScale> rg;
863 BitField<24, 3, LightingScale> rr;
864
865 static float GetScale(LightingScale scale) {
866 switch (scale) {
867 case LightingScale::Scale1:
868 return 1.0f;
869 case LightingScale::Scale2:
870 return 2.0f;
871 case LightingScale::Scale4:
872 return 4.0f;
873 case LightingScale::Scale8:
874 return 8.0f;
875 case LightingScale::Scale1_4:
876 return 0.25f;
877 case LightingScale::Scale1_2:
878 return 0.5f;
879 }
880 return 0.0f;
881 }
882 } lut_scale;
883
884 INSERT_PADDING_WORDS(0x6);
885
886 union {
887 // There are 8 light enable "slots", corresponding to the total number of lights
888 // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num'
889 // above), the first N slots below will be set to integers within the range of 0-7,
890 // corresponding to the actual light that is enabled for each slot.
891
892 BitField< 0, 3, u32> slot_0;
893 BitField< 4, 3, u32> slot_1;
894 BitField< 8, 3, u32> slot_2;
895 BitField<12, 3, u32> slot_3;
896 BitField<16, 3, u32> slot_4;
897 BitField<20, 3, u32> slot_5;
898 BitField<24, 3, u32> slot_6;
899 BitField<28, 3, u32> slot_7;
900
901 unsigned GetNum(unsigned index) const {
902 const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 };
903 return enable_slots[index];
904 }
905 } light_enable;
906 } lighting;
907
908 INSERT_PADDING_WORDS(0x26);
645 909
646 enum class VertexAttributeFormat : u64 { 910 enum class VertexAttributeFormat : u64 {
647 BYTE = 0, 911 BYTE = 0,
@@ -990,6 +1254,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68);
990ASSERT_REG_POSITION(texture0_enable, 0x80); 1254ASSERT_REG_POSITION(texture0_enable, 0x80);
991ASSERT_REG_POSITION(texture0, 0x81); 1255ASSERT_REG_POSITION(texture0, 0x81);
992ASSERT_REG_POSITION(texture0_format, 0x8e); 1256ASSERT_REG_POSITION(texture0_format, 0x8e);
1257ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f);
993ASSERT_REG_POSITION(texture1, 0x91); 1258ASSERT_REG_POSITION(texture1, 0x91);
994ASSERT_REG_POSITION(texture1_format, 0x96); 1259ASSERT_REG_POSITION(texture1_format, 0x96);
995ASSERT_REG_POSITION(texture2, 0x99); 1260ASSERT_REG_POSITION(texture2, 0x99);
@@ -1004,6 +1269,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8);
1004ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); 1269ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
1005ASSERT_REG_POSITION(output_merger, 0x100); 1270ASSERT_REG_POSITION(output_merger, 0x100);
1006ASSERT_REG_POSITION(framebuffer, 0x110); 1271ASSERT_REG_POSITION(framebuffer, 0x110);
1272ASSERT_REG_POSITION(lighting, 0x140);
1007ASSERT_REG_POSITION(vertex_attributes, 0x200); 1273ASSERT_REG_POSITION(vertex_attributes, 0x200);
1008ASSERT_REG_POSITION(index_array, 0x227); 1274ASSERT_REG_POSITION(index_array, 0x227);
1009ASSERT_REG_POSITION(num_vertices, 0x228); 1275ASSERT_REG_POSITION(num_vertices, 0x228);
@@ -1026,118 +1292,6 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st
1026static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); 1292static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
1027static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); 1293static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
1028 1294
1029struct float24 {
1030 static float24 FromFloat32(float val) {
1031 float24 ret;
1032 ret.value = val;
1033 return ret;
1034 }
1035
1036 // 16 bit mantissa, 7 bit exponent, 1 bit sign
1037 // TODO: No idea if this works as intended
1038 static float24 FromRawFloat24(u32 hex) {
1039 float24 ret;
1040 if ((hex & 0xFFFFFF) == 0) {
1041 ret.value = 0;
1042 } else {
1043 u32 mantissa = hex & 0xFFFF;
1044 u32 exponent = (hex >> 16) & 0x7F;
1045 u32 sign = hex >> 23;
1046 ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
1047 if (sign)
1048 ret.value = -ret.value;
1049 }
1050 return ret;
1051 }
1052
1053 static float24 Zero() {
1054 return FromFloat32(0.f);
1055 }
1056
1057 // Not recommended for anything but logging
1058 float ToFloat32() const {
1059 return value;
1060 }
1061
1062 float24 operator * (const float24& flt) const {
1063 if ((this->value == 0.f && !std::isnan(flt.value)) ||
1064 (flt.value == 0.f && !std::isnan(this->value)))
1065 // PICA gives 0 instead of NaN when multiplying by inf
1066 return Zero();
1067 return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
1068 }
1069
1070 float24 operator / (const float24& flt) const {
1071 return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
1072 }
1073
1074 float24 operator + (const float24& flt) const {
1075 return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
1076 }
1077
1078 float24 operator - (const float24& flt) const {
1079 return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
1080 }
1081
1082 float24& operator *= (const float24& flt) {
1083 if ((this->value == 0.f && !std::isnan(flt.value)) ||
1084 (flt.value == 0.f && !std::isnan(this->value)))
1085 // PICA gives 0 instead of NaN when multiplying by inf
1086 *this = Zero();
1087 else value *= flt.ToFloat32();
1088 return *this;
1089 }
1090
1091 float24& operator /= (const float24& flt) {
1092 value /= flt.ToFloat32();
1093 return *this;
1094 }
1095
1096 float24& operator += (const float24& flt) {
1097 value += flt.ToFloat32();
1098 return *this;
1099 }
1100
1101 float24& operator -= (const float24& flt) {
1102 value -= flt.ToFloat32();
1103 return *this;
1104 }
1105
1106 float24 operator - () const {
1107 return float24::FromFloat32(-ToFloat32());
1108 }
1109
1110 bool operator < (const float24& flt) const {
1111 return ToFloat32() < flt.ToFloat32();
1112 }
1113
1114 bool operator > (const float24& flt) const {
1115 return ToFloat32() > flt.ToFloat32();
1116 }
1117
1118 bool operator >= (const float24& flt) const {
1119 return ToFloat32() >= flt.ToFloat32();
1120 }
1121
1122 bool operator <= (const float24& flt) const {
1123 return ToFloat32() <= flt.ToFloat32();
1124 }
1125
1126 bool operator == (const float24& flt) const {
1127 return ToFloat32() == flt.ToFloat32();
1128 }
1129
1130 bool operator != (const float24& flt) const {
1131 return ToFloat32() != flt.ToFloat32();
1132 }
1133
1134private:
1135 // Stored as a regular float, merely for convenience
1136 // TODO: Perform proper arithmetic on this!
1137 float value;
1138};
1139static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
1140
1141/// Struct used to describe current Pica state 1295/// Struct used to describe current Pica state
1142struct State { 1296struct State {
1143 /// Pica registers 1297 /// Pica registers
@@ -1163,6 +1317,25 @@ struct State {
1163 ShaderSetup vs; 1317 ShaderSetup vs;
1164 ShaderSetup gs; 1318 ShaderSetup gs;
1165 1319
1320 struct {
1321 union LutEntry {
1322 // Used for raw access
1323 u32 raw;
1324
1325 // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
1326 BitField< 0, 12, u32> value;
1327
1328 // Used by HW for efficient interpolation, Citra does not use these
1329 BitField<12, 12, u32> difference;
1330
1331 float ToFloat() {
1332 return static_cast<float>(value) / 4095.f;
1333 }
1334 };
1335
1336 std::array<std::array<LutEntry, 256>, 24> luts;
1337 } lighting;
1338
1166 /// Current Pica command list 1339 /// Current Pica command list
1167 struct { 1340 struct {
1168 const u32* head_ptr; 1341 const u32* head_ptr;
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
new file mode 100644
index 000000000..ecf45654b
--- /dev/null
+++ b/src/video_core/pica_types.h
@@ -0,0 +1,146 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8
9#include "common/common_types.h"
10
11namespace Pica {
12
13/**
14 * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision
15 * floating point.
16 *
17 * When decoding, format is as follows:
18 * - The first `M` bits are the mantissa
19 * - The next `E` bits are the exponent
20 * - The last bit is the sign bit
21 *
22 * @todo Verify on HW if this conversion is sufficiently accurate.
23 */
24template<unsigned M, unsigned E>
25struct Float {
26public:
27 static Float<M, E> FromFloat32(float val) {
28 Float<M, E> ret;
29 ret.value = val;
30 return ret;
31 }
32
33 static Float<M, E> FromRaw(u32 hex) {
34 Float<M, E> res;
35
36 const int width = M + E + 1;
37 const int bias = 128 - (1 << (E - 1));
38 const int exponent = (hex >> M) & ((1 << E) - 1);
39 const unsigned mantissa = hex & ((1 << M) - 1);
40
41 if (hex & ((1 << (width - 1)) - 1))
42 hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23);
43 else
44 hex = ((hex >> (E + M)) << 31);
45
46 std::memcpy(&res.value, &hex, sizeof(float));
47
48 return res;
49 }
50
51 static Float<M, E> Zero() {
52 return FromFloat32(0.f);
53 }
54
55 // Not recommended for anything but logging
56 float ToFloat32() const {
57 return value;
58 }
59
60 Float<M, E> operator * (const Float<M, E>& flt) const {
61 if ((this->value == 0.f && !std::isnan(flt.value)) ||
62 (flt.value == 0.f && !std::isnan(this->value)))
63 // PICA gives 0 instead of NaN when multiplying by inf
64 return Zero();
65 return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32());
66 }
67
68 Float<M, E> operator / (const Float<M, E>& flt) const {
69 return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
70 }
71
72 Float<M, E> operator + (const Float<M, E>& flt) const {
73 return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
74 }
75
76 Float<M, E> operator - (const Float<M, E>& flt) const {
77 return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
78 }
79
80 Float<M, E>& operator *= (const Float<M, E>& flt) {
81 if ((this->value == 0.f && !std::isnan(flt.value)) ||
82 (flt.value == 0.f && !std::isnan(this->value)))
83 // PICA gives 0 instead of NaN when multiplying by inf
84 *this = Zero();
85 else value *= flt.ToFloat32();
86 return *this;
87 }
88
89 Float<M, E>& operator /= (const Float<M, E>& flt) {
90 value /= flt.ToFloat32();
91 return *this;
92 }
93
94 Float<M, E>& operator += (const Float<M, E>& flt) {
95 value += flt.ToFloat32();
96 return *this;
97 }
98
99 Float<M, E>& operator -= (const Float<M, E>& flt) {
100 value -= flt.ToFloat32();
101 return *this;
102 }
103
104 Float<M, E> operator - () const {
105 return Float<M, E>::FromFloat32(-ToFloat32());
106 }
107
108 bool operator < (const Float<M, E>& flt) const {
109 return ToFloat32() < flt.ToFloat32();
110 }
111
112 bool operator > (const Float<M, E>& flt) const {
113 return ToFloat32() > flt.ToFloat32();
114 }
115
116 bool operator >= (const Float<M, E>& flt) const {
117 return ToFloat32() >= flt.ToFloat32();
118 }
119
120 bool operator <= (const Float<M, E>& flt) const {
121 return ToFloat32() <= flt.ToFloat32();
122 }
123
124 bool operator == (const Float<M, E>& flt) const {
125 return ToFloat32() == flt.ToFloat32();
126 }
127
128 bool operator != (const Float<M, E>& flt) const {
129 return ToFloat32() != flt.ToFloat32();
130 }
131
132private:
133 static const unsigned MASK = (1 << (M + E + 1)) - 1;
134 static const unsigned MANTISSA_MASK = (1 << M) - 1;
135 static const unsigned EXPONENT_MASK = (1 << E) - 1;
136
137 // Stored as a regular float, merely for convenience
138 // TODO: Perform proper arithmetic on this!
139 float value;
140};
141
142using float24 = Float<16, 7>;
143using float20 = Float<12, 7>;
144using float16 = Float<10, 5>;
145
146} // namespace Pica
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 291ef737d..b7d19bf94 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -36,7 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
36 stage.GetAlphaMultiplier() == 1); 36 stage.GetAlphaMultiplier() == 1);
37} 37}
38 38
39RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr(0) { } 39RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { }
40RasterizerOpenGL::~RasterizerOpenGL() { } 40RasterizerOpenGL::~RasterizerOpenGL() { }
41 41
42void RasterizerOpenGL::InitObjects() { 42void RasterizerOpenGL::InitObjects() {
@@ -75,6 +75,12 @@ void RasterizerOpenGL::InitObjects() {
75 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 75 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
77 77
78 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
79 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
80
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83
78 SetShader(); 84 SetShader();
79 85
80 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation 86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
@@ -120,6 +126,19 @@ void RasterizerOpenGL::InitObjects() {
120 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); 126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
121 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); 127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
122 128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 }
140 state.Apply();
141
123 ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, 142 ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE,
124 "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); 143 "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER));
125} 144}
@@ -139,12 +158,34 @@ void RasterizerOpenGL::Reset() {
139 res_cache.InvalidateAll(); 158 res_cache.InvalidateAll();
140} 159}
141 160
161/**
162 * This is a helper function to resolve an issue with opposite quaternions being interpolated by
163 * OpenGL. See below for a detailed description of this issue (yuriks):
164 *
165 * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
166 * interpolate two quaternions that are opposite, instead of going from one rotation to another
167 * using the shortest path, you'll go around the longest path. You can test if two quaternions are
168 * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore
169 * making Dot(-Q1, W2) positive.
170 *
171 * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This
172 * should be correct for nearly all cases, however a more correct implementation (but less trivial
173 * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions
174 * manually using two Lerps, and doing this correction before each Lerp.
175 */
176static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) {
177 Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() };
178 Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() };
179
180 return (Math::Dot(a, b) < 0.f);
181}
182
142void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, 183void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
143 const Pica::Shader::OutputVertex& v1, 184 const Pica::Shader::OutputVertex& v1,
144 const Pica::Shader::OutputVertex& v2) { 185 const Pica::Shader::OutputVertex& v2) {
145 vertex_batch.emplace_back(v0); 186 vertex_batch.emplace_back(v0, false);
146 vertex_batch.emplace_back(v1); 187 vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
147 vertex_batch.emplace_back(v2); 188 vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
148} 189}
149 190
150void RasterizerOpenGL::DrawTriangles() { 191void RasterizerOpenGL::DrawTriangles() {
@@ -156,6 +197,13 @@ void RasterizerOpenGL::DrawTriangles() {
156 state.draw.shader_dirty = false; 197 state.draw.shader_dirty = false;
157 } 198 }
158 199
200 for (unsigned index = 0; index < lighting_lut.size(); index++) {
201 if (uniform_block_data.lut_dirty[index]) {
202 SyncLightingLUT(index);
203 uniform_block_data.lut_dirty[index] = false;
204 }
205 }
206
159 if (uniform_block_data.dirty) { 207 if (uniform_block_data.dirty) {
160 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 208 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
161 uniform_block_data.dirty = false; 209 uniform_block_data.dirty = false;
@@ -169,16 +217,14 @@ void RasterizerOpenGL::DrawTriangles() {
169 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture 217 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture
170 const auto& regs = Pica::g_state.regs; 218 const auto& regs = Pica::g_state.regs;
171 219
172 PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); 220 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format)
173 u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) 221 * fb_color_texture.width * fb_color_texture.height;
174 * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
175 222
176 PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); 223 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format)
177 u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) 224 * fb_depth_texture.width * fb_depth_texture.height;
178 * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
179 225
180 res_cache.InvalidateInRange(cur_fb_color_addr, cur_fb_color_size, true); 226 res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true);
181 res_cache.InvalidateInRange(cur_fb_depth_addr, cur_fb_depth_size, true); 227 res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
182} 228}
183 229
184void RasterizerOpenGL::FlushFramebuffer() { 230void RasterizerOpenGL::FlushFramebuffer() {
@@ -285,44 +331,199 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
285 case PICA_REG_INDEX(tev_combiner_buffer_color): 331 case PICA_REG_INDEX(tev_combiner_buffer_color):
286 SyncCombinerColor(); 332 SyncCombinerColor();
287 break; 333 break;
334
335 // Fragment lighting specular 0 color
336 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10):
337 SyncLightSpecular0(0);
338 break;
339 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10):
340 SyncLightSpecular0(1);
341 break;
342 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10):
343 SyncLightSpecular0(2);
344 break;
345 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10):
346 SyncLightSpecular0(3);
347 break;
348 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10):
349 SyncLightSpecular0(4);
350 break;
351 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10):
352 SyncLightSpecular0(5);
353 break;
354 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10):
355 SyncLightSpecular0(6);
356 break;
357 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10):
358 SyncLightSpecular0(7);
359 break;
360
361 // Fragment lighting specular 1 color
362 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10):
363 SyncLightSpecular1(0);
364 break;
365 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10):
366 SyncLightSpecular1(1);
367 break;
368 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10):
369 SyncLightSpecular1(2);
370 break;
371 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10):
372 SyncLightSpecular1(3);
373 break;
374 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10):
375 SyncLightSpecular1(4);
376 break;
377 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10):
378 SyncLightSpecular1(5);
379 break;
380 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10):
381 SyncLightSpecular1(6);
382 break;
383 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10):
384 SyncLightSpecular1(7);
385 break;
386
387 // Fragment lighting diffuse color
388 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10):
389 SyncLightDiffuse(0);
390 break;
391 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10):
392 SyncLightDiffuse(1);
393 break;
394 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10):
395 SyncLightDiffuse(2);
396 break;
397 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10):
398 SyncLightDiffuse(3);
399 break;
400 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10):
401 SyncLightDiffuse(4);
402 break;
403 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10):
404 SyncLightDiffuse(5);
405 break;
406 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10):
407 SyncLightDiffuse(6);
408 break;
409 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10):
410 SyncLightDiffuse(7);
411 break;
412
413 // Fragment lighting ambient color
414 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10):
415 SyncLightAmbient(0);
416 break;
417 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10):
418 SyncLightAmbient(1);
419 break;
420 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10):
421 SyncLightAmbient(2);
422 break;
423 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10):
424 SyncLightAmbient(3);
425 break;
426 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10):
427 SyncLightAmbient(4);
428 break;
429 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10):
430 SyncLightAmbient(5);
431 break;
432 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10):
433 SyncLightAmbient(6);
434 break;
435 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10):
436 SyncLightAmbient(7);
437 break;
438
439 // Fragment lighting position
440 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10):
441 case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10):
442 SyncLightPosition(0);
443 break;
444 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10):
445 case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10):
446 SyncLightPosition(1);
447 break;
448 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10):
449 case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10):
450 SyncLightPosition(2);
451 break;
452 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10):
453 case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10):
454 SyncLightPosition(3);
455 break;
456 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10):
457 case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10):
458 SyncLightPosition(4);
459 break;
460 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10):
461 case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10):
462 SyncLightPosition(5);
463 break;
464 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10):
465 case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10):
466 SyncLightPosition(6);
467 break;
468 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10):
469 case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10):
470 SyncLightPosition(7);
471 break;
472
473 // Fragment lighting global ambient color (emission + ambient * ambient)
474 case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0):
475 SyncGlobalAmbient();
476 break;
477
478 // Fragment lighting lookup tables
479 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
480 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
481 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
482 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
483 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
484 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
485 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
486 case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
487 {
488 auto& lut_config = regs.lighting.lut_config;
489 uniform_block_data.lut_dirty[lut_config.type / 4] = true;
490 break;
491 }
492
288 } 493 }
289} 494}
290 495
291void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { 496void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
292 const auto& regs = Pica::g_state.regs; 497 const auto& regs = Pica::g_state.regs;
293 498
294 PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); 499 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format)
295 u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) 500 * fb_color_texture.width * fb_color_texture.height;
296 * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
297 501
298 PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); 502 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format)
299 u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) 503 * fb_depth_texture.width * fb_depth_texture.height;
300 * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
301 504
302 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens 505 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens
303 if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) 506 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size))
304 CommitColorBuffer(); 507 CommitColorBuffer();
305 508
306 if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) 509 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size))
307 CommitDepthBuffer(); 510 CommitDepthBuffer();
308} 511}
309 512
310void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { 513void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) {
311 const auto& regs = Pica::g_state.regs; 514 const auto& regs = Pica::g_state.regs;
312 515
313 PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); 516 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format)
314 u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) 517 * fb_color_texture.width * fb_color_texture.height;
315 * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
316 518
317 PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); 519 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format)
318 u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) 520 * fb_depth_texture.width * fb_depth_texture.height;
319 * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
320 521
321 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL 522 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL
322 if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) 523 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size))
323 ReloadColorBuffer(); 524 ReloadColorBuffer();
324 525
325 if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) 526 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size))
326 ReloadDepthBuffer(); 527 ReloadDepthBuffer();
327 528
328 // Notify cache of flush in case the region touches a cached resource 529 // Notify cache of flush in case the region touches a cached resource
@@ -497,27 +698,48 @@ void RasterizerOpenGL::SetShader() {
497 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); 698 uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
498 if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } 699 if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); }
499 700
701 // Set the texture samplers to correspond to different lookup table texture units
702 GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
703 if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); }
704 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
705 if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); }
706 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
707 if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); }
708 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
709 if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); }
710 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
711 if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); }
712 uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
713 if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); }
714
500 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); 715 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
501 716
502 unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); 717 unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
503 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 718 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
504 }
505 719
506 // Update uniforms 720 // Update uniforms
507 SyncAlphaTest(); 721 SyncAlphaTest();
508 SyncCombinerColor(); 722 SyncCombinerColor();
509 auto& tev_stages = Pica::g_state.regs.GetTevStages(); 723 auto& tev_stages = Pica::g_state.regs.GetTevStages();
510 for (int index = 0; index < tev_stages.size(); ++index) 724 for (int index = 0; index < tev_stages.size(); ++index)
511 SyncTevConstColor(index, tev_stages[index]); 725 SyncTevConstColor(index, tev_stages[index]);
726
727 SyncGlobalAmbient();
728 for (int light_index = 0; light_index < 8; light_index++) {
729 SyncLightDiffuse(light_index);
730 SyncLightAmbient(light_index);
731 SyncLightPosition(light_index);
732 }
733 }
512} 734}
513 735
514void RasterizerOpenGL::SyncFramebuffer() { 736void RasterizerOpenGL::SyncFramebuffer() {
515 const auto& regs = Pica::g_state.regs; 737 const auto& regs = Pica::g_state.regs;
516 738
517 PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); 739 PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
518 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; 740 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
519 741
520 PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); 742 PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
521 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; 743 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
522 744
523 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) || 745 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
@@ -529,10 +751,10 @@ void RasterizerOpenGL::SyncFramebuffer() {
529 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || 751 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
530 fb_size_changed; 752 fb_size_changed;
531 753
532 bool color_fb_modified = last_fb_color_addr != cur_fb_color_addr || 754 bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
533 color_fb_prop_changed; 755 color_fb_prop_changed;
534 756
535 bool depth_fb_modified = last_fb_depth_addr != cur_fb_depth_addr || 757 bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
536 depth_fb_prop_changed; 758 depth_fb_prop_changed;
537 759
538 // Commit if framebuffer modified in any way 760 // Commit if framebuffer modified in any way
@@ -572,13 +794,13 @@ void RasterizerOpenGL::SyncFramebuffer() {
572 794
573 // Load buffer data again if fb modified in any way 795 // Load buffer data again if fb modified in any way
574 if (color_fb_modified) { 796 if (color_fb_modified) {
575 last_fb_color_addr = cur_fb_color_addr; 797 cached_fb_color_addr = new_fb_color_addr;
576 798
577 ReloadColorBuffer(); 799 ReloadColorBuffer();
578 } 800 }
579 801
580 if (depth_fb_modified) { 802 if (depth_fb_modified) {
581 last_fb_depth_addr = cur_fb_depth_addr; 803 cached_fb_depth_addr = new_fb_depth_addr;
582 804
583 ReloadDepthBuffer(); 805 ReloadDepthBuffer();
584 } 806 }
@@ -610,8 +832,8 @@ void RasterizerOpenGL::SyncCullMode() {
610} 832}
611 833
612void RasterizerOpenGL::SyncDepthModifiers() { 834void RasterizerOpenGL::SyncDepthModifiers() {
613 float depth_scale = -Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 835 float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
614 float depth_offset = Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; 836 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f;
615 837
616 // TODO: Implement scale modifier 838 // TODO: Implement scale modifier
617 uniform_block_data.data.depth_offset = depth_offset; 839 uniform_block_data.data.depth_offset = depth_offset;
@@ -689,12 +911,81 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS
689 } 911 }
690} 912}
691 913
914void RasterizerOpenGL::SyncGlobalAmbient() {
915 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient);
916 if (color != uniform_block_data.data.lighting_global_ambient) {
917 uniform_block_data.data.lighting_global_ambient = color;
918 uniform_block_data.dirty = true;
919 }
920}
921
922void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
923 std::array<GLvec4, 256> new_data;
924
925 for (unsigned offset = 0; offset < new_data.size(); ++offset) {
926 new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat();
927 new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat();
928 new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat();
929 new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat();
930 }
931
932 if (new_data != lighting_lut_data[lut_index]) {
933 lighting_lut_data[lut_index] = new_data;
934 glActiveTexture(GL_TEXTURE3 + lut_index);
935 glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data());
936 }
937}
938
939void RasterizerOpenGL::SyncLightSpecular0(int light_index) {
940 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
941 if (color != uniform_block_data.data.light_src[light_index].specular_0) {
942 uniform_block_data.data.light_src[light_index].specular_0 = color;
943 uniform_block_data.dirty = true;
944 }
945}
946
947void RasterizerOpenGL::SyncLightSpecular1(int light_index) {
948 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
949 if (color != uniform_block_data.data.light_src[light_index].specular_1) {
950 uniform_block_data.data.light_src[light_index].specular_1 = color;
951 uniform_block_data.dirty = true;
952 }
953}
954
955void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
956 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
957 if (color != uniform_block_data.data.light_src[light_index].diffuse) {
958 uniform_block_data.data.light_src[light_index].diffuse = color;
959 uniform_block_data.dirty = true;
960 }
961}
962
963void RasterizerOpenGL::SyncLightAmbient(int light_index) {
964 auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
965 if (color != uniform_block_data.data.light_src[light_index].ambient) {
966 uniform_block_data.data.light_src[light_index].ambient = color;
967 uniform_block_data.dirty = true;
968 }
969}
970
971void RasterizerOpenGL::SyncLightPosition(int light_index) {
972 GLvec3 position = {
973 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
974 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
975 Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() };
976
977 if (position != uniform_block_data.data.light_src[light_index].position) {
978 uniform_block_data.data.light_src[light_index].position = position;
979 uniform_block_data.dirty = true;
980 }
981}
982
692void RasterizerOpenGL::SyncDrawState() { 983void RasterizerOpenGL::SyncDrawState() {
693 const auto& regs = Pica::g_state.regs; 984 const auto& regs = Pica::g_state.regs;
694 985
695 // Sync the viewport 986 // Sync the viewport
696 GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; 987 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
697 GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; 988 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
698 989
699 // OpenGL uses different y coordinates, so negate corner offset and flip origin 990 // OpenGL uses different y coordinates, so negate corner offset and flip origin
700 // TODO: Ensure viewport_corner.x should not be negated or origin flipped 991 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
@@ -723,7 +1014,7 @@ void RasterizerOpenGL::SyncDrawState() {
723MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); 1014MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
724 1015
725void RasterizerOpenGL::ReloadColorBuffer() { 1016void RasterizerOpenGL::ReloadColorBuffer() {
726 u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); 1017 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
727 1018
728 if (color_buffer == nullptr) 1019 if (color_buffer == nullptr)
729 return; 1020 return;
@@ -758,13 +1049,11 @@ void RasterizerOpenGL::ReloadColorBuffer() {
758} 1049}
759 1050
760void RasterizerOpenGL::ReloadDepthBuffer() { 1051void RasterizerOpenGL::ReloadDepthBuffer() {
761 PAddr depth_buffer_addr = Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress(); 1052 if (cached_fb_depth_addr == 0)
762
763 if (depth_buffer_addr == 0)
764 return; 1053 return;
765 1054
766 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil 1055 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
767 u8* depth_buffer = Memory::GetPhysicalPointer(depth_buffer_addr); 1056 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
768 1057
769 if (depth_buffer == nullptr) 1058 if (depth_buffer == nullptr)
770 return; 1059 return;
@@ -827,8 +1116,8 @@ Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
827MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); 1116MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
828 1117
829void RasterizerOpenGL::CommitColorBuffer() { 1118void RasterizerOpenGL::CommitColorBuffer() {
830 if (last_fb_color_addr != 0) { 1119 if (cached_fb_color_addr != 0) {
831 u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr); 1120 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
832 1121
833 if (color_buffer != nullptr) { 1122 if (color_buffer != nullptr) {
834 Common::Profiling::ScopeTimer timer(buffer_commit_category); 1123 Common::Profiling::ScopeTimer timer(buffer_commit_category);
@@ -863,9 +1152,9 @@ void RasterizerOpenGL::CommitColorBuffer() {
863} 1152}
864 1153
865void RasterizerOpenGL::CommitDepthBuffer() { 1154void RasterizerOpenGL::CommitDepthBuffer() {
866 if (last_fb_depth_addr != 0) { 1155 if (cached_fb_depth_addr != 0) {
867 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. 1156 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
868 u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr); 1157 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
869 1158
870 if (depth_buffer != nullptr) { 1159 if (depth_buffer != nullptr) {
871 Common::Profiling::ScopeTimer timer(buffer_commit_category); 1160 Common::Profiling::ScopeTimer timer(buffer_commit_category);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index c8a2d8f16..fef5f5331 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -17,6 +17,7 @@
17#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 18#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
19#include "video_core/renderer_opengl/gl_state.h" 19#include "video_core/renderer_opengl/gl_state.h"
20#include "video_core/renderer_opengl/pica_to_gl.h"
20#include "video_core/shader/shader_interpreter.h" 21#include "video_core/shader/shader_interpreter.h"
21 22
22/** 23/**
@@ -71,6 +72,59 @@ struct PicaShaderConfig {
71 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 72 regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
72 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; 73 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
73 74
75 // Fragment lighting
76
77 res.lighting.enable = !regs.lighting.disable;
78 res.lighting.src_num = regs.lighting.num_lights + 1;
79
80 for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) {
81 unsigned num = regs.lighting.light_enable.GetNum(light_index);
82 const auto& light = regs.lighting.light[num];
83 res.lighting.light[light_index].num = num;
84 res.lighting.light[light_index].directional = light.directional != 0;
85 res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
86 res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
87 res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
88 res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
89 }
90
91 res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
92 res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
93 res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
94 res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
95
96 res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
97 res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
98 res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
99 res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
100
101 res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
102 res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
103 res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
104 res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
105
106 res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
107 res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
108 res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
109 res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
110
111 res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
112 res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
113 res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
114 res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
115
116 res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
117 res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
118 res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
119 res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
120
121 res.lighting.config = regs.lighting.config;
122 res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
123 res.lighting.bump_mode = regs.lighting.bump_mode;
124 res.lighting.bump_selector = regs.lighting.bump_selector;
125 res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
126 res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
127
74 return res; 128 return res;
75 } 129 }
76 130
@@ -86,9 +140,37 @@ struct PicaShaderConfig {
86 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; 140 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0;
87 }; 141 };
88 142
89 Pica::Regs::CompareFunc alpha_test_func; 143 Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never;
90 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; 144 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
91 u8 combiner_buffer_input; 145 u8 combiner_buffer_input = 0;
146
147 struct {
148 struct {
149 unsigned num = 0;
150 bool directional = false;
151 bool two_sided_diffuse = false;
152 bool dist_atten_enable = false;
153 GLfloat dist_atten_scale = 0.0f;
154 GLfloat dist_atten_bias = 0.0f;
155 } light[8];
156
157 bool enable = false;
158 unsigned src_num = 0;
159 Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
160 unsigned bump_selector = 0;
161 bool bump_renorm = false;
162 bool clamp_highlights = false;
163
164 Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
165 Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
166
167 struct {
168 bool enable = false;
169 bool abs_input = false;
170 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
171 float scale = 1.0f;
172 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
173 } lighting;
92}; 174};
93 175
94namespace std { 176namespace std {
@@ -167,7 +249,7 @@ private:
167 249
168 /// Structure that the hardware rendered vertices are composed of 250 /// Structure that the hardware rendered vertices are composed of
169 struct HardwareVertex { 251 struct HardwareVertex {
170 HardwareVertex(const Pica::Shader::OutputVertex& v) { 252 HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
171 position[0] = v.pos.x.ToFloat32(); 253 position[0] = v.pos.x.ToFloat32();
172 position[1] = v.pos.y.ToFloat32(); 254 position[1] = v.pos.y.ToFloat32();
173 position[2] = v.pos.z.ToFloat32(); 255 position[2] = v.pos.z.ToFloat32();
@@ -182,6 +264,19 @@ private:
182 tex_coord1[1] = v.tc1.y.ToFloat32(); 264 tex_coord1[1] = v.tc1.y.ToFloat32();
183 tex_coord2[0] = v.tc2.x.ToFloat32(); 265 tex_coord2[0] = v.tc2.x.ToFloat32();
184 tex_coord2[1] = v.tc2.y.ToFloat32(); 266 tex_coord2[1] = v.tc2.y.ToFloat32();
267 normquat[0] = v.quat.x.ToFloat32();
268 normquat[1] = v.quat.y.ToFloat32();
269 normquat[2] = v.quat.z.ToFloat32();
270 normquat[3] = v.quat.w.ToFloat32();
271 view[0] = v.view.x.ToFloat32();
272 view[1] = v.view.y.ToFloat32();
273 view[2] = v.view.z.ToFloat32();
274
275 if (flip_quaternion) {
276 for (float& x : normquat) {
277 x = -x;
278 }
279 }
185 } 280 }
186 281
187 GLfloat position[4]; 282 GLfloat position[4];
@@ -189,20 +284,31 @@ private:
189 GLfloat tex_coord0[2]; 284 GLfloat tex_coord0[2];
190 GLfloat tex_coord1[2]; 285 GLfloat tex_coord1[2];
191 GLfloat tex_coord2[2]; 286 GLfloat tex_coord2[2];
287 GLfloat normquat[4];
288 GLfloat view[3];
289 };
290
291 struct LightSrc {
292 alignas(16) GLvec3 specular_0;
293 alignas(16) GLvec3 specular_1;
294 alignas(16) GLvec3 diffuse;
295 alignas(16) GLvec3 ambient;
296 alignas(16) GLvec3 position;
192 }; 297 };
193 298
194 /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned 299 /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
195 struct UniformData { 300 struct UniformData {
196 // A vec4 color for each of the six tev stages 301 // A vec4 color for each of the six tev stages
197 std::array<GLfloat, 4> const_color[6]; 302 GLvec4 const_color[6];
198 std::array<GLfloat, 4> tev_combiner_buffer_color; 303 GLvec4 tev_combiner_buffer_color;
199 GLint alphatest_ref; 304 GLint alphatest_ref;
200 GLfloat depth_offset; 305 GLfloat depth_offset;
201 INSERT_PADDING_BYTES(8); 306 alignas(16) GLvec3 lighting_global_ambient;
307 LightSrc light_src[8];
202 }; 308 };
203 309
204 static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader"); 310 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
205 static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec"); 311 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
206 312
207 /// Reconfigure the OpenGL color texture to use the given format and dimensions 313 /// Reconfigure the OpenGL color texture to use the given format and dimensions
208 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); 314 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
@@ -249,6 +355,27 @@ private:
249 /// Syncs the TEV combiner color buffer to match the PICA register 355 /// Syncs the TEV combiner color buffer to match the PICA register
250 void SyncCombinerColor(); 356 void SyncCombinerColor();
251 357
358 /// Syncs the lighting global ambient color to match the PICA register
359 void SyncGlobalAmbient();
360
361 /// Syncs the lighting lookup tables
362 void SyncLightingLUT(unsigned index);
363
364 /// Syncs the specified light's diffuse color to match the PICA register
365 void SyncLightDiffuse(int light_index);
366
367 /// Syncs the specified light's ambient color to match the PICA register
368 void SyncLightAmbient(int light_index);
369
370 /// Syncs the specified light's position to match the PICA register
371 void SyncLightPosition(int light_index);
372
373 /// Syncs the specified light's specular 0 color to match the PICA register
374 void SyncLightSpecular0(int light_index);
375
376 /// Syncs the specified light's specular 1 color to match the PICA register
377 void SyncLightSpecular1(int light_index);
378
252 /// Syncs the remaining OpenGL drawing state to match the current PICA state 379 /// Syncs the remaining OpenGL drawing state to match the current PICA state
253 void SyncDrawState(); 380 void SyncDrawState();
254 381
@@ -278,8 +405,8 @@ private:
278 405
279 OpenGLState state; 406 OpenGLState state;
280 407
281 PAddr last_fb_color_addr; 408 PAddr cached_fb_color_addr;
282 PAddr last_fb_depth_addr; 409 PAddr cached_fb_depth_addr;
283 410
284 // Hardware rasterizer 411 // Hardware rasterizer
285 std::array<SamplerInfo, 3> texture_samplers; 412 std::array<SamplerInfo, 3> texture_samplers;
@@ -291,6 +418,7 @@ private:
291 418
292 struct { 419 struct {
293 UniformData data; 420 UniformData data;
421 bool lut_dirty[6];
294 bool dirty; 422 bool dirty;
295 } uniform_block_data; 423 } uniform_block_data;
296 424
@@ -298,4 +426,7 @@ private:
298 OGLBuffer vertex_buffer; 426 OGLBuffer vertex_buffer;
299 OGLBuffer uniform_buffer; 427 OGLBuffer uniform_buffer;
300 OGLFramebuffer framebuffer; 428 OGLFramebuffer framebuffer;
429
430 std::array<OGLTexture, 6> lighting_lut;
431 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
301}; 432};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 22022f7f4..ee4b54ab9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -32,12 +32,10 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
32 out += "primary_color"; 32 out += "primary_color";
33 break; 33 break;
34 case Source::PrimaryFragmentColor: 34 case Source::PrimaryFragmentColor:
35 // HACK: Until we implement fragment lighting, use primary_color 35 out += "primary_fragment_color";
36 out += "primary_color";
37 break; 36 break;
38 case Source::SecondaryFragmentColor: 37 case Source::SecondaryFragmentColor:
39 // HACK: Until we implement fragment lighting, use zero 38 out += "secondary_fragment_color";
40 out += "vec4(0.0)";
41 break; 39 break;
42 case Source::Texture0: 40 case Source::Texture0:
43 out += "texture(tex[0], texcoord[0])"; 41 out += "texture(tex[0], texcoord[0])";
@@ -320,26 +318,229 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
320 out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; 318 out += "next_combiner_buffer.a = last_tex_env_out.a;\n";
321} 319}
322 320
321/// Writes the code to emulate fragment lighting
322static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
323 // Define lighting globals
324 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
325 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
326 "vec3 light_vector = vec3(0.0);\n"
327 "vec3 refl_value = vec3(0.0);\n";
328
329 // Compute fragment normals
330 if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
331 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
332 std::string bump_selector = std::to_string(config.lighting.bump_selector);
333 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
334
335 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
336 if (config.lighting.bump_renorm) {
337 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
338 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
339 }
340 } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
341 // Bump mapping is enabled using a tangent map
342 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
343 UNIMPLEMENTED();
344 } else {
345 // No bump mapping - surface local normal is just a unit normal
346 out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
347 }
348
349 // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace
350 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
351
352 // Gets the index into the specified lookup table for specular lighting
353 auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) {
354 const std::string half_angle = "normalize(normalize(view) + light_vector)";
355 std::string index;
356 switch (input) {
357 case Regs::LightingLutInput::NH:
358 index = "dot(normal, " + half_angle + ")";
359 break;
360
361 case Regs::LightingLutInput::VH:
362 index = std::string("dot(normalize(view), " + half_angle + ")");
363 break;
364
365 case Regs::LightingLutInput::NV:
366 index = std::string("dot(normal, normalize(view))");
367 break;
368
369 case Regs::LightingLutInput::LN:
370 index = std::string("dot(light_vector, normal)");
371 break;
372
373 default:
374 LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
375 UNIMPLEMENTED();
376 break;
377 }
378
379 if (abs) {
380 // LUT index is in the range of (0.0, 1.0)
381 index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
382 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
383 } else {
384 // LUT index is in the range of (-1.0, 1.0)
385 index = "clamp(" + index + ", -1.0, 1.0)";
386 return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)";
387 }
388
389 return std::string();
390 };
391
392 // Gets the lighting lookup table value given the specified sampler and index
393 auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) {
394 return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " +
395 lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]");
396 };
397
398 // Write the code to emulate each enabled light
399 for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) {
400 const auto& light_config = config.lighting.light[light_index];
401 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
402
403 // Compute light vector (directional or positional)
404 if (light_config.directional)
405 out += "light_vector = normalize(" + light_src + ".position);\n";
406 else
407 out += "light_vector = normalize(" + light_src + ".position + view);\n";
408
409 // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided
410 std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)";
411
412 // If enabled, compute distance attenuation value
413 std::string dist_atten = "1.0";
414 if (light_config.dist_atten_enable) {
415 std::string scale = std::to_string(light_config.dist_atten_scale);
416 std::string bias = std::to_string(light_config.dist_atten_bias);
417 std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
418 index = "((clamp(" + index + ", 0.0, FLOAT_255)))";
419 const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
420 dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index);
421 }
422
423 // If enabled, clamp specular component if lighting result is negative
424 std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
425
426 // Specular 0 component
427 std::string d0_lut_value = "1.0";
428 if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
429 // Lookup specular "distribution 0" LUT value
430 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
431 d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
432 }
433 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
434
435 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
436 if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
437 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input);
438 std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
439 out += "refl_value.r = " + value + ";\n";
440 } else {
441 out += "refl_value.r = 1.0;\n";
442 }
443
444 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
445 if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
446 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input);
447 std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
448 out += "refl_value.g = " + value + ";\n";
449 } else {
450 out += "refl_value.g = refl_value.r;\n";
451 }
452
453 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
454 if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
455 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input);
456 std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
457 out += "refl_value.b = " + value + ";\n";
458 } else {
459 out += "refl_value.b = refl_value.r;\n";
460 }
461
462 // Specular 1 component
463 std::string d1_lut_value = "1.0";
464 if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
465 // Lookup specular "distribution 1" LUT value
466 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input);
467 d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
468 }
469 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
470
471 // Fresnel
472 if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
473 // Lookup fresnel LUT value
474 std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input);
475 std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
476
477 // Enabled for difffuse lighting alpha component
478 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
479 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
480 out += "diffuse_sum.a *= " + value + ";\n";
481
482 // Enabled for the specular lighting alpha component
483 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
484 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
485 out += "specular_sum.a *= " + value + ";\n";
486 }
487
488 // Compute primary fragment color (diffuse lighting) function
489 out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n";
490
491 // Compute secondary fragment color (specular lighting) function
492 out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n";
493 }
494
495 // Sum final lighting result
496 out += "diffuse_sum.rgb += lighting_global_ambient;\n";
497 out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n";
498 out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n";
499}
500
323std::string GenerateFragmentShader(const PicaShaderConfig& config) { 501std::string GenerateFragmentShader(const PicaShaderConfig& config) {
324 std::string out = R"( 502 std::string out = R"(
325#version 330 core 503#version 330 core
326#define NUM_TEV_STAGES 6 504#define NUM_TEV_STAGES 6
505#define NUM_LIGHTS 8
506#define LIGHTING_LUT_SIZE 256
507#define FLOAT_255 (255.0 / 256.0)
327 508
328in vec4 primary_color; 509in vec4 primary_color;
329in vec2 texcoord[3]; 510in vec2 texcoord[3];
511in vec4 normquat;
512in vec3 view;
330 513
331out vec4 color; 514out vec4 color;
332 515
516struct LightSrc {
517 vec3 specular_0;
518 vec3 specular_1;
519 vec3 diffuse;
520 vec3 ambient;
521 vec3 position;
522};
523
333layout (std140) uniform shader_data { 524layout (std140) uniform shader_data {
334 vec4 const_color[NUM_TEV_STAGES]; 525 vec4 const_color[NUM_TEV_STAGES];
335 vec4 tev_combiner_buffer_color; 526 vec4 tev_combiner_buffer_color;
336 int alphatest_ref; 527 int alphatest_ref;
337 float depth_offset; 528 float depth_offset;
529 vec3 lighting_global_ambient;
530 LightSrc light_src[NUM_LIGHTS];
338}; 531};
339 532
340uniform sampler2D tex[3]; 533uniform sampler2D tex[3];
534uniform sampler1D lut[6];
535
536// Rotate the vector v by the quaternion q
537vec3 quaternion_rotate(vec4 q, vec3 v) {
538 return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
539}
341 540
342void main() { 541void main() {
542vec4 primary_fragment_color = vec4(0.0);
543vec4 secondary_fragment_color = vec4(0.0);
343)"; 544)";
344 545
345 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test 546 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
@@ -348,6 +549,9 @@ void main() {
348 return out; 549 return out;
349 } 550 }
350 551
552 if (config.lighting.enable)
553 WriteLighting(out, config);
554
351 out += "vec4 combiner_buffer = vec4(0.0);\n"; 555 out += "vec4 combiner_buffer = vec4(0.0);\n";
352 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; 556 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
353 out += "vec4 last_tex_env_out = vec4(0.0);\n"; 557 out += "vec4 last_tex_env_out = vec4(0.0);\n";
@@ -369,21 +573,28 @@ void main() {
369 573
370std::string GenerateVertexShader() { 574std::string GenerateVertexShader() {
371 std::string out = "#version 330 core\n"; 575 std::string out = "#version 330 core\n";
576
372 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 577 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
373 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 578 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
374 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 579 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
375 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 580 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
376 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 581 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
582 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
583 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
377 584
378 out += R"( 585 out += R"(
379out vec4 primary_color; 586out vec4 primary_color;
380out vec2 texcoord[3]; 587out vec2 texcoord[3];
588out vec4 normquat;
589out vec3 view;
381 590
382void main() { 591void main() {
383 primary_color = vert_color; 592 primary_color = vert_color;
384 texcoord[0] = vert_texcoord0; 593 texcoord[0] = vert_texcoord0;
385 texcoord[1] = vert_texcoord1; 594 texcoord[1] = vert_texcoord1;
386 texcoord[2] = vert_texcoord2; 595 texcoord[2] = vert_texcoord2;
596 normquat = vert_normquat;
597 view = vert_view;
387 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); 598 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
388} 599}
389)"; 600)";
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 046aae14f..097242f6f 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,8 @@ enum Attributes {
14 ATTRIBUTE_TEXCOORD0, 14 ATTRIBUTE_TEXCOORD0,
15 ATTRIBUTE_TEXCOORD1, 15 ATTRIBUTE_TEXCOORD1,
16 ATTRIBUTE_TEXCOORD2, 16 ATTRIBUTE_TEXCOORD2,
17 ATTRIBUTE_NORMQUAT,
18 ATTRIBUTE_VIEW,
17}; 19};
18 20
19/** 21/**
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index a82372995..08e4d0b54 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -48,6 +48,10 @@ OpenGLState::OpenGLState() {
48 texture_unit.sampler = 0; 48 texture_unit.sampler = 0;
49 } 49 }
50 50
51 for (auto& lut : lighting_lut) {
52 lut.texture_1d = 0;
53 }
54
51 draw.framebuffer = 0; 55 draw.framebuffer = 0;
52 draw.vertex_array = 0; 56 draw.vertex_array = 0;
53 draw.vertex_buffer = 0; 57 draw.vertex_buffer = 0;
@@ -170,6 +174,14 @@ void OpenGLState::Apply() {
170 } 174 }
171 } 175 }
172 176
177 // Lighting LUTs
178 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) {
179 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) {
180 glActiveTexture(GL_TEXTURE3 + i);
181 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d);
182 }
183 }
184
173 // Framebuffer 185 // Framebuffer
174 if (draw.framebuffer != cur_state.draw.framebuffer) { 186 if (draw.framebuffer != cur_state.draw.framebuffer) {
175 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 187 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b8ab45bb8..e848058d7 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -62,6 +62,10 @@ public:
62 } texture_units[3]; 62 } texture_units[3];
63 63
64 struct { 64 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6];
67
68 struct {
65 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
66 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
67 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 04c1d1a34..3d6c4e9e5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -10,6 +10,9 @@
10 10
11#include "video_core/pica.h" 11#include "video_core/pica.h"
12 12
13using GLvec3 = std::array<GLfloat, 3>;
14using GLvec4 = std::array<GLfloat, 4>;
15
13namespace PicaToGL { 16namespace PicaToGL {
14 17
15inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { 18inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
@@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) {
175 return stencil_op_table[(unsigned)action]; 178 return stencil_op_table[(unsigned)action];
176} 179}
177 180
178inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { 181inline GLvec4 ColorRGBA8(const u32 color) {
179 return { { (color >> 0 & 0xFF) / 255.0f, 182 return { { (color >> 0 & 0xFF) / 255.0f,
180 (color >> 8 & 0xFF) / 255.0f, 183 (color >> 8 & 0xFF) / 255.0f,
181 (color >> 16 & 0xFF) / 255.0f, 184 (color >> 16 & 0xFF) / 255.0f,
@@ -183,4 +186,11 @@ inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) {
183 } }; 186 } };
184} 187}
185 188
189inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) {
190 return { { color.r / 255.0f,
191 color.g / 255.0f,
192 color.b / 255.0f
193 } };
194}
195
186} // namespace 196} // namespace
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a6a38f0af..ca3a6a6b4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -81,8 +81,8 @@ struct ScreenRectVertex {
81 * The projection part of the matrix is trivial, hence these operations are represented 81 * The projection part of the matrix is trivial, hence these operations are represented
82 * by a 3x2 matrix. 82 * by a 3x2 matrix.
83 */ 83 */
84static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) { 84static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
85 std::array<GLfloat, 3*2> matrix; 85 std::array<GLfloat, 3 * 2> matrix;
86 86
87 matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; 87 matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
88 matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; 88 matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 59f54236b..44c234ed8 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -134,11 +134,13 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
134 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); 134 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
135 } 135 }
136 136
137 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", 137 LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
138 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
138 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 139 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
139 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), 140 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
140 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 141 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
141 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); 142 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
143 ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
142 144
143 return ret; 145 return ret;
144} 146}
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 1c6fa592c..f068cd93f 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -37,17 +37,19 @@ struct OutputVertex {
37 Math::Vec4<float24> color; 37 Math::Vec4<float24> color;
38 Math::Vec2<float24> tc0; 38 Math::Vec2<float24> tc0;
39 Math::Vec2<float24> tc1; 39 Math::Vec2<float24> tc1;
40 float24 pad[6]; 40 INSERT_PADDING_WORDS(2);
41 Math::Vec3<float24> view;
42 INSERT_PADDING_WORDS(1);
41 Math::Vec2<float24> tc2; 43 Math::Vec2<float24> tc2;
42 44
43 // Padding for optimal alignment 45 // Padding for optimal alignment
44 float24 pad2[4]; 46 INSERT_PADDING_WORDS(4);
45 47
46 // Attributes used to store intermediate results 48 // Attributes used to store intermediate results
47 49
48 // position after perspective divide 50 // position after perspective divide
49 Math::Vec3<float24> screenpos; 51 Math::Vec3<float24> screenpos;
50 float24 pad3; 52 INSERT_PADDING_WORDS(1);
51 53
52 // Linear interpolation 54 // Linear interpolation
53 // factor: 0=this, 1=vtx 55 // factor: 0=this, 1=vtx