diff options
Diffstat (limited to 'src')
44 files changed, 2151 insertions, 354 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cb09f3cd1..2bb411492 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -4,6 +4,7 @@ include_directories(.) | |||
| 4 | add_subdirectory(common) | 4 | add_subdirectory(common) |
| 5 | add_subdirectory(core) | 5 | add_subdirectory(core) |
| 6 | add_subdirectory(video_core) | 6 | add_subdirectory(video_core) |
| 7 | add_subdirectory(audio_core) | ||
| 7 | if (ENABLE_GLFW) | 8 | if (ENABLE_GLFW) |
| 8 | add_subdirectory(citra) | 9 | add_subdirectory(citra) |
| 9 | endif() | 10 | endif() |
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt new file mode 100644 index 000000000..b0d1c7eb6 --- /dev/null +++ b/src/audio_core/CMakeLists.txt | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | set(SRCS | ||
| 2 | audio_core.cpp | ||
| 3 | hle/dsp.cpp | ||
| 4 | hle/pipe.cpp | ||
| 5 | ) | ||
| 6 | |||
| 7 | set(HEADERS | ||
| 8 | audio_core.h | ||
| 9 | hle/dsp.h | ||
| 10 | hle/pipe.h | ||
| 11 | sink.h | ||
| 12 | ) | ||
| 13 | |||
| 14 | create_directory_groups(${SRCS} ${HEADERS}) | ||
| 15 | |||
| 16 | add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file | ||
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp new file mode 100644 index 000000000..894f46990 --- /dev/null +++ b/src/audio_core/audio_core.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "audio_core/audio_core.h" | ||
| 6 | #include "audio_core/hle/dsp.h" | ||
| 7 | |||
| 8 | #include "core/core_timing.h" | ||
| 9 | #include "core/hle/kernel/vm_manager.h" | ||
| 10 | #include "core/hle/service/dsp_dsp.h" | ||
| 11 | |||
| 12 | namespace AudioCore { | ||
| 13 | |||
| 14 | // Audio Ticks occur about every 5 miliseconds. | ||
| 15 | static int tick_event; ///< CoreTiming event | ||
| 16 | static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles | ||
| 17 | |||
| 18 | static void AudioTickCallback(u64 /*userdata*/, int cycles_late) { | ||
| 19 | if (DSP::HLE::Tick()) { | ||
| 20 | // HACK: We're not signaling the interrups when they should be, but just firing them all off together. | ||
| 21 | // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here. | ||
| 22 | // TODO(merry): Understand when the other interrupts are fired. | ||
| 23 | DSP_DSP::SignalAllInterrupts(); | ||
| 24 | } | ||
| 25 | |||
| 26 | // Reschedule recurrent event | ||
| 27 | CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event); | ||
| 28 | } | ||
| 29 | |||
| 30 | /// Initialise Audio | ||
| 31 | void Init() { | ||
| 32 | DSP::HLE::Init(); | ||
| 33 | |||
| 34 | tick_event = CoreTiming::RegisterEvent("AudioCore::tick_event", AudioTickCallback); | ||
| 35 | CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event); | ||
| 36 | } | ||
| 37 | |||
| 38 | /// Add DSP address spaces to Process's address space. | ||
| 39 | void AddAddressSpace(Kernel::VMManager& address_space) { | ||
| 40 | auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); | ||
| 41 | address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); | ||
| 42 | |||
| 43 | auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); | ||
| 44 | address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); | ||
| 45 | } | ||
| 46 | |||
| 47 | /// Shutdown Audio | ||
| 48 | void Shutdown() { | ||
| 49 | CoreTiming::UnscheduleEvent(tick_event, 0); | ||
| 50 | DSP::HLE::Shutdown(); | ||
| 51 | } | ||
| 52 | |||
| 53 | } //namespace | ||
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h new file mode 100644 index 000000000..64c330914 --- /dev/null +++ b/src/audio_core/audio_core.h | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Kernel { | ||
| 8 | class VMManager; | ||
| 9 | } | ||
| 10 | |||
| 11 | namespace AudioCore { | ||
| 12 | |||
| 13 | constexpr int num_sources = 24; | ||
| 14 | constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate | ||
| 15 | constexpr int native_sample_rate = 32728; ///< 32kHz | ||
| 16 | |||
| 17 | /// Initialise Audio Core | ||
| 18 | void Init(); | ||
| 19 | |||
| 20 | /// Add DSP address spaces to a Process. | ||
| 21 | void AddAddressSpace(Kernel::VMManager& vm_manager); | ||
| 22 | |||
| 23 | /// Shutdown Audio Core | ||
| 24 | void Shutdown(); | ||
| 25 | |||
| 26 | } // namespace | ||
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp new file mode 100644 index 000000000..c89356edc --- /dev/null +++ b/src/audio_core/hle/dsp.cpp | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "audio_core/hle/dsp.h" | ||
| 6 | #include "audio_core/hle/pipe.h" | ||
| 7 | |||
| 8 | namespace DSP { | ||
| 9 | namespace HLE { | ||
| 10 | |||
| 11 | SharedMemory g_region0; | ||
| 12 | SharedMemory g_region1; | ||
| 13 | |||
| 14 | void Init() { | ||
| 15 | DSP::HLE::ResetPipes(); | ||
| 16 | } | ||
| 17 | |||
| 18 | void Shutdown() { | ||
| 19 | } | ||
| 20 | |||
| 21 | bool Tick() { | ||
| 22 | return true; | ||
| 23 | } | ||
| 24 | |||
| 25 | SharedMemory& CurrentRegion() { | ||
| 26 | // The region with the higher frame counter is chosen unless there is wraparound. | ||
| 27 | |||
| 28 | if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) { | ||
| 29 | // Wraparound has occured. | ||
| 30 | return g_region1; | ||
| 31 | } | ||
| 32 | |||
| 33 | if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) { | ||
| 34 | // Wraparound has occured. | ||
| 35 | return g_region0; | ||
| 36 | } | ||
| 37 | |||
| 38 | return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1; | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace HLE | ||
| 42 | } // namespace DSP | ||
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h new file mode 100644 index 000000000..14c4000c6 --- /dev/null +++ b/src/audio_core/hle/dsp.h | |||
| @@ -0,0 +1,502 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "audio_core/audio_core.h" | ||
| 11 | |||
| 12 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_funcs.h" | ||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "common/swap.h" | ||
| 16 | |||
| 17 | namespace DSP { | ||
| 18 | namespace HLE { | ||
| 19 | |||
| 20 | // The application-accessible region of DSP memory consists of two parts. | ||
| 21 | // Both are marked as IO and have Read/Write permissions. | ||
| 22 | // | ||
| 23 | // First Region: 0x1FF50000 (Size: 0x8000) | ||
| 24 | // Second Region: 0x1FF70000 (Size: 0x8000) | ||
| 25 | // | ||
| 26 | // The DSP reads from each region alternately based on the frame counter for each region much like a | ||
| 27 | // double-buffer. The frame counter is located as the very last u16 of each region and is incremented | ||
| 28 | // each audio tick. | ||
| 29 | |||
| 30 | struct SharedMemory; | ||
| 31 | |||
| 32 | constexpr VAddr region0_base = 0x1FF50000; | ||
| 33 | extern SharedMemory g_region0; | ||
| 34 | |||
| 35 | constexpr VAddr region1_base = 0x1FF70000; | ||
| 36 | extern SharedMemory g_region1; | ||
| 37 | |||
| 38 | /** | ||
| 39 | * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from | ||
| 40 | * its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian | ||
| 41 | * layout of the ARM11. Hence from the ARM11's point of view the memory space appears to be | ||
| 42 | * middle-endian. | ||
| 43 | * | ||
| 44 | * Unusually this does not appear to be an issue for floating point numbers. The DSP makes the more | ||
| 45 | * sensible choice of keeping that little-endian. There are also some exceptions such as the | ||
| 46 | * IntermediateMixSamples structure, which is little-endian. | ||
| 47 | * | ||
| 48 | * This struct implements the conversion to and from this middle-endianness. | ||
| 49 | */ | ||
| 50 | struct u32_dsp { | ||
| 51 | u32_dsp() = default; | ||
| 52 | operator u32() const { | ||
| 53 | return Convert(storage); | ||
| 54 | } | ||
| 55 | void operator=(u32 new_value) { | ||
| 56 | storage = Convert(new_value); | ||
| 57 | } | ||
| 58 | private: | ||
| 59 | static constexpr u32 Convert(u32 value) { | ||
| 60 | return (value << 16) | (value >> 16); | ||
| 61 | } | ||
| 62 | u32_le storage; | ||
| 63 | }; | ||
| 64 | #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) | ||
| 65 | static_assert(std::is_trivially_copyable<u32_dsp>::value, "u32_dsp isn't trivially copyable"); | ||
| 66 | #endif | ||
| 67 | |||
| 68 | // There are 15 structures in each memory region. A table of them in the order they appear in memory | ||
| 69 | // is presented below | ||
| 70 | // | ||
| 71 | // Pipe 2 # First Region DSP Address Purpose Control | ||
| 72 | // 5 0x8400 DSP Status DSP | ||
| 73 | // 9 0x8410 DSP Debug Info DSP | ||
| 74 | // 6 0x8540 Final Mix Samples DSP | ||
| 75 | // 2 0x8680 Source Status [24] DSP | ||
| 76 | // 8 0x8710 Compressor Table Application | ||
| 77 | // 4 0x9430 DSP Configuration Application | ||
| 78 | // 7 0x9492 Intermediate Mix Samples DSP + App | ||
| 79 | // 1 0x9E92 Source Configuration [24] Application | ||
| 80 | // 3 0xA792 Source ADPCM Coefficients [24] Application | ||
| 81 | // 10 0xA912 Surround Sound Related | ||
| 82 | // 11 0xAA12 Surround Sound Related | ||
| 83 | // 12 0xAAD2 Surround Sound Related | ||
| 84 | // 13 0xAC52 Surround Sound Related | ||
| 85 | // 14 0xAC5C Surround Sound Related | ||
| 86 | // 0 0xBFFF Frame Counter Application | ||
| 87 | // | ||
| 88 | // Note that the above addresses do vary slightly between audio firmwares observed; the addresses are | ||
| 89 | // not fixed in stone. The addresses above are only an examplar; they're what this implementation | ||
| 90 | // does and provides to applications. | ||
| 91 | // | ||
| 92 | // Application requests the DSP service to convert DSP addresses into ARM11 virtual addresses using the | ||
| 93 | // ConvertProcessAddressFromDspDram service call. Applications seem to derive the addresses for the | ||
| 94 | // second region via: | ||
| 95 | // second_region_dsp_addr = first_region_dsp_addr | 0x10000 | ||
| 96 | // | ||
| 97 | // Applications maintain most of its own audio state, the memory region is used mainly for | ||
| 98 | // communication and not storage of state. | ||
| 99 | // | ||
| 100 | // In the documentation below, filter and effect transfer functions are specified in the z domain. | ||
| 101 | // (If you are more familiar with the Laplace transform, z = exp(sT). The z domain is the digital | ||
| 102 | // frequency domain, just like how the s domain is the analog frequency domain.) | ||
| 103 | |||
| 104 | #define INSERT_PADDING_DSPWORDS(num_words) INSERT_PADDING_BYTES(2 * (num_words)) | ||
| 105 | |||
| 106 | // GCC versions < 5.0 do not implement std::is_trivially_copyable. | ||
| 107 | // Excluding MSVC because it has weird behaviour for std::is_trivially_copyable. | ||
| 108 | #if (__GNUC__ >= 5) || defined(__clang__) | ||
| 109 | #define ASSERT_DSP_STRUCT(name, size) \ | ||
| 110 | static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \ | ||
| 111 | static_assert(std::is_trivially_copyable<name>::value, "DSP structure " #name " isn't trivially copyable"); \ | ||
| 112 | static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name) | ||
| 113 | #else | ||
| 114 | #define ASSERT_DSP_STRUCT(name, size) \ | ||
| 115 | static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \ | ||
| 116 | static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name) | ||
| 117 | #endif | ||
| 118 | |||
| 119 | struct SourceConfiguration { | ||
| 120 | struct Configuration { | ||
| 121 | /// These dirty flags are set by the application when it updates the fields in this struct. | ||
| 122 | /// The DSP clears these each audio frame. | ||
| 123 | union { | ||
| 124 | u32_le dirty_raw; | ||
| 125 | |||
| 126 | BitField<2, 1, u32_le> adpcm_coefficients_dirty; | ||
| 127 | BitField<3, 1, u32_le> partial_embedded_buffer_dirty; ///< Tends to be set when a looped buffer is queued. | ||
| 128 | |||
| 129 | BitField<16, 1, u32_le> enable_dirty; | ||
| 130 | BitField<17, 1, u32_le> interpolation_dirty; | ||
| 131 | BitField<18, 1, u32_le> rate_multiplier_dirty; | ||
| 132 | BitField<19, 1, u32_le> buffer_queue_dirty; | ||
| 133 | BitField<20, 1, u32_le> loop_related_dirty; | ||
| 134 | BitField<21, 1, u32_le> play_position_dirty; ///< Tends to also be set when embedded buffer is updated. | ||
| 135 | BitField<22, 1, u32_le> filters_enabled_dirty; | ||
| 136 | BitField<23, 1, u32_le> simple_filter_dirty; | ||
| 137 | BitField<24, 1, u32_le> biquad_filter_dirty; | ||
| 138 | BitField<25, 1, u32_le> gain_0_dirty; | ||
| 139 | BitField<26, 1, u32_le> gain_1_dirty; | ||
| 140 | BitField<27, 1, u32_le> gain_2_dirty; | ||
| 141 | BitField<28, 1, u32_le> sync_dirty; | ||
| 142 | BitField<29, 1, u32_le> reset_flag; | ||
| 143 | |||
| 144 | BitField<31, 1, u32_le> embedded_buffer_dirty; | ||
| 145 | }; | ||
| 146 | |||
| 147 | // Gain control | ||
| 148 | |||
| 149 | /** | ||
| 150 | * Gain is between 0.0-1.0. This determines how much will this source appear on | ||
| 151 | * each of the 12 channels that feed into the intermediate mixers. | ||
| 152 | * Each of the three intermediate mixers is fed two left and two right channels. | ||
| 153 | */ | ||
| 154 | float_le gain[3][4]; | ||
| 155 | |||
| 156 | // Interpolation | ||
| 157 | |||
| 158 | /// Multiplier for sample rate. Resampling occurs with the selected interpolation method. | ||
| 159 | float_le rate_multiplier; | ||
| 160 | |||
| 161 | enum class InterpolationMode : u8 { | ||
| 162 | None = 0, | ||
| 163 | Linear = 1, | ||
| 164 | Polyphase = 2 | ||
| 165 | }; | ||
| 166 | |||
| 167 | InterpolationMode interpolation_mode; | ||
| 168 | INSERT_PADDING_BYTES(1); ///< Interpolation related | ||
| 169 | |||
| 170 | // Filters | ||
| 171 | |||
| 172 | /** | ||
| 173 | * This is the simplest normalized first-order digital recursive filter. | ||
| 174 | * The transfer function of this filter is: | ||
| 175 | * H(z) = b0 / (1 + a1 z^-1) | ||
| 176 | * Values are signed fixed point with 15 fractional bits. | ||
| 177 | */ | ||
| 178 | struct SimpleFilter { | ||
| 179 | s16_le b0; | ||
| 180 | s16_le a1; | ||
| 181 | }; | ||
| 182 | |||
| 183 | /** | ||
| 184 | * This is a normalised biquad filter (second-order). | ||
| 185 | * The transfer function of this filter is: | ||
| 186 | * H(z) = (b0 + b1 z^-1 + b2 z^-2) / (1 - a1 z^-1 - a2 z^-2) | ||
| 187 | * Nintendo chose to negate the feedbackward coefficients. This differs from standard notation | ||
| 188 | * as in: https://ccrma.stanford.edu/~jos/filters/Direct_Form_I.html | ||
| 189 | * Values are signed fixed point with 14 fractional bits. | ||
| 190 | */ | ||
| 191 | struct BiquadFilter { | ||
| 192 | s16_le b0; | ||
| 193 | s16_le b1; | ||
| 194 | s16_le b2; | ||
| 195 | s16_le a1; | ||
| 196 | s16_le a2; | ||
| 197 | }; | ||
| 198 | |||
| 199 | union { | ||
| 200 | u16_le filters_enabled; | ||
| 201 | BitField<0, 1, u16_le> simple_filter_enabled; | ||
| 202 | BitField<1, 1, u16_le> biquad_filter_enabled; | ||
| 203 | }; | ||
| 204 | |||
| 205 | SimpleFilter simple_filter; | ||
| 206 | BiquadFilter biquad_filter; | ||
| 207 | |||
| 208 | // Buffer Queue | ||
| 209 | |||
| 210 | /// A buffer of audio data from the application, along with metadata about it. | ||
| 211 | struct Buffer { | ||
| 212 | /// Physical memory address of the start of the buffer | ||
| 213 | u32_dsp physical_address; | ||
| 214 | |||
| 215 | /// This is length in terms of samples. | ||
| 216 | /// Note that in different buffer formats a sample takes up different number of bytes. | ||
| 217 | u32_dsp length; | ||
| 218 | |||
| 219 | /// ADPCM Predictor (4 bits) and Scale (4 bits) | ||
| 220 | union { | ||
| 221 | u16_le adpcm_ps; | ||
| 222 | BitField<0, 4, u16_le> adpcm_scale; | ||
| 223 | BitField<4, 4, u16_le> adpcm_predictor; | ||
| 224 | }; | ||
| 225 | |||
| 226 | /// ADPCM Historical Samples (y[n-1] and y[n-2]) | ||
| 227 | u16_le adpcm_yn[2]; | ||
| 228 | |||
| 229 | /// This is non-zero when the ADPCM values above are to be updated. | ||
| 230 | u8 adpcm_dirty; | ||
| 231 | |||
| 232 | /// Is a looping buffer. | ||
| 233 | u8 is_looping; | ||
| 234 | |||
| 235 | /// This value is shown in SourceStatus::previous_buffer_id when this buffer has finished. | ||
| 236 | /// This allows the emulated application to tell what buffer is currently playing | ||
| 237 | u16_le buffer_id; | ||
| 238 | |||
| 239 | INSERT_PADDING_DSPWORDS(1); | ||
| 240 | }; | ||
| 241 | |||
| 242 | u16_le buffers_dirty; ///< Bitmap indicating which buffers are dirty (bit i -> buffers[i]) | ||
| 243 | Buffer buffers[4]; ///< Queued Buffers | ||
| 244 | |||
| 245 | // Playback controls | ||
| 246 | |||
| 247 | u32_dsp loop_related; | ||
| 248 | u8 enable; | ||
| 249 | INSERT_PADDING_BYTES(1); | ||
| 250 | u16_le sync; ///< Application-side sync (See also: SourceStatus::sync) | ||
| 251 | u32_dsp play_position; ///< Position. (Units: number of samples) | ||
| 252 | INSERT_PADDING_DSPWORDS(2); | ||
| 253 | |||
| 254 | // Embedded Buffer | ||
| 255 | // This buffer is often the first buffer to be used when initiating audio playback, | ||
| 256 | // after which the buffer queue is used. | ||
| 257 | |||
| 258 | u32_dsp physical_address; | ||
| 259 | |||
| 260 | /// This is length in terms of samples. | ||
| 261 | /// Note a sample takes up different number of bytes in different buffer formats. | ||
| 262 | u32_dsp length; | ||
| 263 | |||
| 264 | enum class MonoOrStereo : u16_le { | ||
| 265 | Mono = 1, | ||
| 266 | Stereo = 2 | ||
| 267 | }; | ||
| 268 | |||
| 269 | enum class Format : u16_le { | ||
| 270 | PCM8 = 0, | ||
| 271 | PCM16 = 1, | ||
| 272 | ADPCM = 2 | ||
| 273 | }; | ||
| 274 | |||
| 275 | union { | ||
| 276 | u16_le flags1_raw; | ||
| 277 | BitField<0, 2, MonoOrStereo> mono_or_stereo; | ||
| 278 | BitField<2, 2, Format> format; | ||
| 279 | BitField<5, 1, u16_le> fade_in; | ||
| 280 | }; | ||
| 281 | |||
| 282 | /// ADPCM Predictor (4 bit) and Scale (4 bit) | ||
| 283 | union { | ||
| 284 | u16_le adpcm_ps; | ||
| 285 | BitField<0, 4, u16_le> adpcm_scale; | ||
| 286 | BitField<4, 4, u16_le> adpcm_predictor; | ||
| 287 | }; | ||
| 288 | |||
| 289 | /// ADPCM Historical Samples (y[n-1] and y[n-2]) | ||
| 290 | u16_le adpcm_yn[2]; | ||
| 291 | |||
| 292 | union { | ||
| 293 | u16_le flags2_raw; | ||
| 294 | BitField<0, 1, u16_le> adpcm_dirty; ///< Has the ADPCM info above been changed? | ||
| 295 | BitField<1, 1, u16_le> is_looping; ///< Is this a looping buffer? | ||
| 296 | }; | ||
| 297 | |||
| 298 | /// Buffer id of embedded buffer (used as a buffer id in SourceStatus to reference this buffer). | ||
| 299 | u16_le buffer_id; | ||
| 300 | }; | ||
| 301 | |||
| 302 | Configuration config[AudioCore::num_sources]; | ||
| 303 | }; | ||
| 304 | ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); | ||
| 305 | ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); | ||
| 306 | |||
| 307 | struct SourceStatus { | ||
| 308 | struct Status { | ||
| 309 | u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) | ||
| 310 | u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes | ||
| 311 | u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync | ||
| 312 | u32_dsp buffer_position; ///< Number of samples into the current buffer | ||
| 313 | u16_le previous_buffer_id; ///< Updated when a buffer finishes playing | ||
| 314 | INSERT_PADDING_DSPWORDS(1); | ||
| 315 | }; | ||
| 316 | |||
| 317 | Status status[AudioCore::num_sources]; | ||
| 318 | }; | ||
| 319 | ASSERT_DSP_STRUCT(SourceStatus::Status, 12); | ||
| 320 | |||
| 321 | struct DspConfiguration { | ||
| 322 | /// These dirty flags are set by the application when it updates the fields in this struct. | ||
| 323 | /// The DSP clears these each audio frame. | ||
| 324 | union { | ||
| 325 | u32_le dirty_raw; | ||
| 326 | |||
| 327 | BitField<8, 1, u32_le> mixer1_enabled_dirty; | ||
| 328 | BitField<9, 1, u32_le> mixer2_enabled_dirty; | ||
| 329 | BitField<10, 1, u32_le> delay_effect_0_dirty; | ||
| 330 | BitField<11, 1, u32_le> delay_effect_1_dirty; | ||
| 331 | BitField<12, 1, u32_le> reverb_effect_0_dirty; | ||
| 332 | BitField<13, 1, u32_le> reverb_effect_1_dirty; | ||
| 333 | |||
| 334 | BitField<16, 1, u32_le> volume_0_dirty; | ||
| 335 | |||
| 336 | BitField<24, 1, u32_le> volume_1_dirty; | ||
| 337 | BitField<25, 1, u32_le> volume_2_dirty; | ||
| 338 | BitField<26, 1, u32_le> output_format_dirty; | ||
| 339 | BitField<27, 1, u32_le> limiter_enabled_dirty; | ||
| 340 | BitField<28, 1, u32_le> headphones_connected_dirty; | ||
| 341 | }; | ||
| 342 | |||
| 343 | /// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for each at the final mixer | ||
| 344 | float_le volume[3]; | ||
| 345 | |||
| 346 | INSERT_PADDING_DSPWORDS(3); | ||
| 347 | |||
| 348 | enum class OutputFormat : u16_le { | ||
| 349 | Mono = 0, | ||
| 350 | Stereo = 1, | ||
| 351 | Surround = 2 | ||
| 352 | }; | ||
| 353 | |||
| 354 | OutputFormat output_format; | ||
| 355 | |||
| 356 | u16_le limiter_enabled; ///< Not sure of the exact gain equation for the limiter. | ||
| 357 | u16_le headphones_connected; ///< Application updates the DSP on headphone status. | ||
| 358 | INSERT_PADDING_DSPWORDS(4); ///< TODO: Surround sound related | ||
| 359 | INSERT_PADDING_DSPWORDS(2); ///< TODO: Intermediate mixer 1/2 related | ||
| 360 | u16_le mixer1_enabled; | ||
| 361 | u16_le mixer2_enabled; | ||
| 362 | |||
| 363 | /** | ||
| 364 | * This is delay with feedback. | ||
| 365 | * Transfer function: | ||
| 366 | * H(z) = a z^-N / (1 - b z^-1 + a g z^-N) | ||
| 367 | * where | ||
| 368 | * N = frame_count * samples_per_frame | ||
| 369 | * g, a and b are fixed point with 7 fractional bits | ||
| 370 | */ | ||
| 371 | struct DelayEffect { | ||
| 372 | /// These dirty flags are set by the application when it updates the fields in this struct. | ||
| 373 | /// The DSP clears these each audio frame. | ||
| 374 | union { | ||
| 375 | u16_le dirty_raw; | ||
| 376 | BitField<0, 1, u16_le> enable_dirty; | ||
| 377 | BitField<1, 1, u16_le> work_buffer_address_dirty; | ||
| 378 | BitField<2, 1, u16_le> other_dirty; ///< Set when anything else has been changed | ||
| 379 | }; | ||
| 380 | |||
| 381 | u16_le enable; | ||
| 382 | INSERT_PADDING_DSPWORDS(1); | ||
| 383 | u16_le outputs; | ||
| 384 | u32_dsp work_buffer_address; ///< The application allocates a block of memory for the DSP to use as a work buffer. | ||
| 385 | u16_le frame_count; ///< Frames to delay by | ||
| 386 | |||
| 387 | // Coefficients | ||
| 388 | s16_le g; ///< Fixed point with 7 fractional bits | ||
| 389 | s16_le a; ///< Fixed point with 7 fractional bits | ||
| 390 | s16_le b; ///< Fixed point with 7 fractional bits | ||
| 391 | }; | ||
| 392 | |||
| 393 | DelayEffect delay_effect[2]; | ||
| 394 | |||
| 395 | struct ReverbEffect { | ||
| 396 | INSERT_PADDING_DSPWORDS(26); ///< TODO | ||
| 397 | }; | ||
| 398 | |||
| 399 | ReverbEffect reverb_effect[2]; | ||
| 400 | |||
| 401 | INSERT_PADDING_DSPWORDS(4); | ||
| 402 | }; | ||
| 403 | ASSERT_DSP_STRUCT(DspConfiguration, 196); | ||
| 404 | ASSERT_DSP_STRUCT(DspConfiguration::DelayEffect, 20); | ||
| 405 | ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52); | ||
| 406 | |||
| 407 | struct AdpcmCoefficients { | ||
| 408 | /// Coefficients are signed fixed point with 11 fractional bits. | ||
| 409 | /// Each source has 16 coefficients associated with it. | ||
| 410 | s16_le coeff[AudioCore::num_sources][16]; | ||
| 411 | }; | ||
| 412 | ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); | ||
| 413 | |||
| 414 | struct DspStatus { | ||
| 415 | u16_le unknown; | ||
| 416 | u16_le dropped_frames; | ||
| 417 | INSERT_PADDING_DSPWORDS(0xE); | ||
| 418 | }; | ||
| 419 | ASSERT_DSP_STRUCT(DspStatus, 32); | ||
| 420 | |||
| 421 | /// Final mixed output in PCM16 stereo format, what you hear out of the speakers. | ||
| 422 | /// When the application writes to this region it has no effect. | ||
| 423 | struct FinalMixSamples { | ||
| 424 | s16_le pcm16[2 * AudioCore::samples_per_frame]; | ||
| 425 | }; | ||
| 426 | ASSERT_DSP_STRUCT(FinalMixSamples, 640); | ||
| 427 | |||
| 428 | /// DSP writes output of intermediate mixers 1 and 2 here. | ||
| 429 | /// Writes to this region by the application edits the output of the intermediate mixers. | ||
| 430 | /// This seems to be intended to allow the application to do custom effects on the ARM11. | ||
| 431 | /// Values that exceed s16 range will be clipped by the DSP after further processing. | ||
| 432 | struct IntermediateMixSamples { | ||
| 433 | struct Samples { | ||
| 434 | s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. | ||
| 435 | }; | ||
| 436 | |||
| 437 | Samples mix1; | ||
| 438 | Samples mix2; | ||
| 439 | }; | ||
| 440 | ASSERT_DSP_STRUCT(IntermediateMixSamples, 5120); | ||
| 441 | |||
| 442 | /// Compressor table | ||
| 443 | struct Compressor { | ||
| 444 | INSERT_PADDING_DSPWORDS(0xD20); ///< TODO | ||
| 445 | }; | ||
| 446 | |||
| 447 | /// There is no easy way to implement this in a HLE implementation. | ||
| 448 | struct DspDebug { | ||
| 449 | INSERT_PADDING_DSPWORDS(0x130); | ||
| 450 | }; | ||
| 451 | ASSERT_DSP_STRUCT(DspDebug, 0x260); | ||
| 452 | |||
| 453 | struct SharedMemory { | ||
| 454 | /// Padding | ||
| 455 | INSERT_PADDING_DSPWORDS(0x400); | ||
| 456 | |||
| 457 | DspStatus dsp_status; | ||
| 458 | |||
| 459 | DspDebug dsp_debug; | ||
| 460 | |||
| 461 | FinalMixSamples final_samples; | ||
| 462 | |||
| 463 | SourceStatus source_statuses; | ||
| 464 | |||
| 465 | Compressor compressor; | ||
| 466 | |||
| 467 | DspConfiguration dsp_configuration; | ||
| 468 | |||
| 469 | IntermediateMixSamples intermediate_mix_samples; | ||
| 470 | |||
| 471 | SourceConfiguration source_configurations; | ||
| 472 | |||
| 473 | AdpcmCoefficients adpcm_coefficients; | ||
| 474 | |||
| 475 | /// Unknown 10-14 (Surround sound related) | ||
| 476 | INSERT_PADDING_DSPWORDS(0x16ED); | ||
| 477 | |||
| 478 | u16_le frame_counter; | ||
| 479 | }; | ||
| 480 | ASSERT_DSP_STRUCT(SharedMemory, 0x8000); | ||
| 481 | |||
| 482 | #undef INSERT_PADDING_DSPWORDS | ||
| 483 | #undef ASSERT_DSP_STRUCT | ||
| 484 | |||
| 485 | /// Initialize DSP hardware | ||
| 486 | void Init(); | ||
| 487 | |||
| 488 | /// Shutdown DSP hardware | ||
| 489 | void Shutdown(); | ||
| 490 | |||
| 491 | /** | ||
| 492 | * Perform processing and updates state of current shared memory buffer. | ||
| 493 | * This function is called every audio tick before triggering the audio interrupt. | ||
| 494 | * @return Whether an audio interrupt should be triggered this frame. | ||
| 495 | */ | ||
| 496 | bool Tick(); | ||
| 497 | |||
| 498 | /// Returns a mutable reference to the current region. Current region is selected based on the frame counter. | ||
| 499 | SharedMemory& CurrentRegion(); | ||
| 500 | |||
| 501 | } // namespace HLE | ||
| 502 | } // namespace DSP | ||
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp new file mode 100644 index 000000000..6542c760c --- /dev/null +++ b/src/audio_core/hle/pipe.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "audio_core/hle/pipe.h" | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | |||
| 13 | namespace DSP { | ||
| 14 | namespace HLE { | ||
| 15 | |||
| 16 | static size_t pipe2position = 0; | ||
| 17 | |||
| 18 | void ResetPipes() { | ||
| 19 | pipe2position = 0; | ||
| 20 | } | ||
| 21 | |||
| 22 | std::vector<u8> PipeRead(u32 pipe_number, u32 length) { | ||
| 23 | if (pipe_number != 2) { | ||
| 24 | LOG_WARNING(Audio_DSP, "pipe_number = %u (!= 2), unimplemented", pipe_number); | ||
| 25 | return {}; // We currently don't handle anything other than the audio pipe. | ||
| 26 | } | ||
| 27 | |||
| 28 | // Canned DSP responses that games expect. These were taken from HW by 3dmoo team. | ||
| 29 | // TODO: Our implementation will actually use a slightly different response than this one. | ||
| 30 | // TODO: Use offsetof on DSP structures instead for a proper response. | ||
| 31 | static const std::array<u8, 32> canned_response {{ | ||
| 32 | 0x0F, 0x00, 0xFF, 0xBF, 0x8E, 0x9E, 0x80, 0x86, 0x8E, 0xA7, 0x30, 0x94, 0x00, 0x84, 0x40, 0x85, | ||
| 33 | 0x8E, 0x94, 0x10, 0x87, 0x10, 0x84, 0x0E, 0xA9, 0x0E, 0xAA, 0xCE, 0xAA, 0x4E, 0xAC, 0x58, 0xAC | ||
| 34 | }}; | ||
| 35 | |||
| 36 | // TODO: Move this into dsp::DSP service since it happens on the service side. | ||
| 37 | // Hardware observation: No data is returned if requested length reads beyond the end of the data in-pipe. | ||
| 38 | if (pipe2position + length > canned_response.size()) { | ||
| 39 | return {}; | ||
| 40 | } | ||
| 41 | |||
| 42 | std::vector<u8> ret; | ||
| 43 | for (size_t i = 0; i < length; i++, pipe2position++) { | ||
| 44 | ret.emplace_back(canned_response[pipe2position]); | ||
| 45 | } | ||
| 46 | |||
| 47 | return ret; | ||
| 48 | } | ||
| 49 | |||
| 50 | void PipeWrite(u32 pipe_number, const std::vector<u8>& buffer) { | ||
| 51 | // TODO: proper pipe behaviour | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace HLE | ||
| 55 | } // namespace DSP | ||
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h new file mode 100644 index 000000000..ff6536950 --- /dev/null +++ b/src/audio_core/hle/pipe.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace DSP { | ||
| 12 | namespace HLE { | ||
| 13 | |||
| 14 | /// Reset the pipes by setting pipe positions back to the beginning. | ||
| 15 | void ResetPipes(); | ||
| 16 | |||
| 17 | /** | ||
| 18 | * Read a DSP pipe. | ||
| 19 | * Pipe IDs: | ||
| 20 | * pipe_number = 0: Debug | ||
| 21 | * pipe_number = 1: P-DMA | ||
| 22 | * pipe_number = 2: Audio | ||
| 23 | * pipe_number = 3: Binary | ||
| 24 | * @param pipe_number The Pipe ID | ||
| 25 | * @param length How much data to request. | ||
| 26 | * @return The data read from the pipe. The size of this vector can be less than the length requested. | ||
| 27 | */ | ||
| 28 | std::vector<u8> PipeRead(u32 pipe_number, u32 length); | ||
| 29 | |||
| 30 | /** | ||
| 31 | * Write to a DSP pipe. | ||
| 32 | * @param pipe_number The Pipe ID | ||
| 33 | * @param buffer The data to write to the pipe. | ||
| 34 | */ | ||
| 35 | void PipeWrite(u32 pipe_number, const std::vector<u8>& buffer); | ||
| 36 | |||
| 37 | } // namespace HLE | ||
| 38 | } // namespace DSP | ||
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h new file mode 100644 index 000000000..cad21a85e --- /dev/null +++ b/src/audio_core/sink.h | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace AudioCore { | ||
| 12 | |||
| 13 | /** | ||
| 14 | * This class is an interface for an audio sink. An audio sink accepts samples in stereo signed PCM16 format to be output. | ||
| 15 | * Sinks *do not* handle resampling and expect the correct sample rate. They are dumb outputs. | ||
| 16 | */ | ||
| 17 | class Sink { | ||
| 18 | public: | ||
| 19 | virtual ~Sink() = default; | ||
| 20 | |||
| 21 | /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec) | ||
| 22 | virtual unsigned GetNativeSampleRate() const = 0; | ||
| 23 | |||
| 24 | /** | ||
| 25 | * Feed stereo samples to sink. | ||
| 26 | * @param samples Samples in interleaved stereo PCM16 format. Size of vector must be multiple of two. | ||
| 27 | */ | ||
| 28 | virtual void EnqueueSamples(const std::vector<s16>& samples) = 0; | ||
| 29 | |||
| 30 | /// Samples enqueued that have not been played yet. | ||
| 31 | virtual std::size_t SamplesInQueue() const = 0; | ||
| 32 | }; | ||
| 33 | |||
| 34 | } // namespace | ||
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt index e7f8a17f9..b9abb818e 100644 --- a/src/citra/CMakeLists.txt +++ b/src/citra/CMakeLists.txt | |||
| @@ -17,7 +17,7 @@ include_directories(${GLFW_INCLUDE_DIRS}) | |||
| 17 | link_directories(${GLFW_LIBRARY_DIRS}) | 17 | link_directories(${GLFW_LIBRARY_DIRS}) |
| 18 | 18 | ||
| 19 | add_executable(citra ${SRCS} ${HEADERS}) | 19 | add_executable(citra ${SRCS} ${HEADERS}) |
| 20 | target_link_libraries(citra core video_core common) | 20 | target_link_libraries(citra core video_core audio_core common) |
| 21 | target_link_libraries(citra ${GLFW_LIBRARIES} ${OPENGL_gl_LIBRARY} inih glad) | 21 | target_link_libraries(citra ${GLFW_LIBRARIES} ${OPENGL_gl_LIBRARY} inih glad) |
| 22 | if (MSVC) | 22 | if (MSVC) |
| 23 | target_link_libraries(citra getopt) | 23 | target_link_libraries(citra getopt) |
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index bbf6ae001..b3d1205a4 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt | |||
| @@ -79,7 +79,7 @@ if (APPLE) | |||
| 79 | else() | 79 | else() |
| 80 | add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) | 80 | add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) |
| 81 | endif() | 81 | endif() |
| 82 | target_link_libraries(citra-qt core video_core common qhexedit) | 82 | target_link_libraries(citra-qt core video_core audio_core common qhexedit) |
| 83 | target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) | 83 | target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) |
| 84 | target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) | 84 | target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) |
| 85 | 85 | ||
diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 66689f398..371eb17a1 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h | |||
| @@ -115,29 +115,24 @@ template<std::size_t position, std::size_t bits, typename T> | |||
| 115 | struct BitField | 115 | struct BitField |
| 116 | { | 116 | { |
| 117 | private: | 117 | private: |
| 118 | // This constructor might be considered ambiguous: | 118 | // We hide the copy assigment operator here, because the default copy |
| 119 | // Would it initialize the storage or just the bitfield? | 119 | // assignment would copy the full storage value, rather than just the bits |
| 120 | // Hence, delete it. Use the assignment operator to set bitfield values! | 120 | // relevant to this particular bit field. |
| 121 | BitField(T val) = delete; | 121 | // We don't delete it because we want BitField to be trivially copyable. |
| 122 | BitField& operator=(const BitField&) = default; | ||
| 122 | 123 | ||
| 123 | public: | 124 | public: |
| 125 | // This constructor and assignment operator might be considered ambiguous: | ||
| 126 | // Would they initialize the storage or just the bitfield? | ||
| 127 | // Hence, delete them. Use the Assign method to set bitfield values! | ||
| 128 | BitField(T val) = delete; | ||
| 129 | BitField& operator=(T val) = delete; | ||
| 130 | |||
| 124 | // Force default constructor to be created | 131 | // Force default constructor to be created |
| 125 | // so that we can use this within unions | 132 | // so that we can use this within unions |
| 126 | BitField() = default; | 133 | BitField() = default; |
| 127 | 134 | ||
| 128 | // We explicitly delete the copy assigment operator here, because the | 135 | FORCE_INLINE operator T() const { |
| 129 | // default copy assignment would copy the full storage value, rather than | ||
| 130 | // just the bits relevant to this particular bit field. | ||
| 131 | BitField& operator=(const BitField&) = delete; | ||
| 132 | |||
| 133 | FORCE_INLINE BitField& operator=(T val) | ||
| 134 | { | ||
| 135 | Assign(val); | ||
| 136 | return *this; | ||
| 137 | } | ||
| 138 | |||
| 139 | FORCE_INLINE operator T() const | ||
| 140 | { | ||
| 141 | return Value(); | 136 | return Value(); |
| 142 | } | 137 | } |
| 143 | 138 | ||
| @@ -145,8 +140,7 @@ public: | |||
| 145 | storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask()); | 140 | storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask()); |
| 146 | } | 141 | } |
| 147 | 142 | ||
| 148 | FORCE_INLINE T Value() const | 143 | FORCE_INLINE T Value() const { |
| 149 | { | ||
| 150 | if (std::numeric_limits<T>::is_signed) | 144 | if (std::numeric_limits<T>::is_signed) |
| 151 | { | 145 | { |
| 152 | std::size_t shift = 8 * sizeof(T)-bits; | 146 | std::size_t shift = 8 * sizeof(T)-bits; |
| @@ -159,8 +153,7 @@ public: | |||
| 159 | } | 153 | } |
| 160 | 154 | ||
| 161 | // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 | 155 | // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 |
| 162 | FORCE_INLINE bool ToBool() const | 156 | FORCE_INLINE bool ToBool() const { |
| 163 | { | ||
| 164 | return Value() != 0; | 157 | return Value() != 0; |
| 165 | } | 158 | } |
| 166 | 159 | ||
| @@ -176,8 +169,7 @@ private: | |||
| 176 | // Unsigned version of StorageType | 169 | // Unsigned version of StorageType |
| 177 | typedef typename std::make_unsigned<StorageType>::type StorageTypeU; | 170 | typedef typename std::make_unsigned<StorageType>::type StorageTypeU; |
| 178 | 171 | ||
| 179 | FORCE_INLINE StorageType GetMask() const | 172 | FORCE_INLINE StorageType GetMask() const { |
| 180 | { | ||
| 181 | return (((StorageTypeU)~0) >> (8 * sizeof(T)-bits)) << position; | 173 | return (((StorageTypeU)~0) >> (8 * sizeof(T)-bits)) << position; |
| 182 | } | 174 | } |
| 183 | 175 | ||
| @@ -189,6 +181,10 @@ private: | |||
| 189 | static_assert(position < 8 * sizeof(T), "Invalid position"); | 181 | static_assert(position < 8 * sizeof(T), "Invalid position"); |
| 190 | static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); | 182 | static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); |
| 191 | static_assert(bits > 0, "Invalid number of bits"); | 183 | static_assert(bits > 0, "Invalid number of bits"); |
| 192 | static_assert(std::is_standard_layout<T>::value, "Invalid base type"); | 184 | static_assert(std::is_pod<T>::value, "Invalid base type"); |
| 193 | }; | 185 | }; |
| 194 | #pragma pack() | 186 | #pragma pack() |
| 187 | |||
| 188 | #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) | ||
| 189 | static_assert(std::is_trivially_copyable<BitField<0, 1, u32>>::value, "BitField must be trivially copyable"); | ||
| 190 | #endif | ||
diff --git a/src/common/emu_window.cpp b/src/common/emu_window.cpp index b69b05cb9..b2807354a 100644 --- a/src/common/emu_window.cpp +++ b/src/common/emu_window.cpp | |||
| @@ -55,14 +55,14 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) { | |||
| 55 | (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top); | 55 | (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top); |
| 56 | 56 | ||
| 57 | touch_pressed = true; | 57 | touch_pressed = true; |
| 58 | pad_state.touch = 1; | 58 | pad_state.touch.Assign(1); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | void EmuWindow::TouchReleased() { | 61 | void EmuWindow::TouchReleased() { |
| 62 | touch_pressed = false; | 62 | touch_pressed = false; |
| 63 | touch_x = 0; | 63 | touch_x = 0; |
| 64 | touch_y = 0; | 64 | touch_y = 0; |
| 65 | pad_state.touch = 0; | 65 | pad_state.touch.Assign(0); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) { | 68 | void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) { |
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index d186ba8f8..58819012d 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp | |||
| @@ -58,6 +58,8 @@ namespace Log { | |||
| 58 | CLS(Render) \ | 58 | CLS(Render) \ |
| 59 | SUB(Render, Software) \ | 59 | SUB(Render, Software) \ |
| 60 | SUB(Render, OpenGL) \ | 60 | SUB(Render, OpenGL) \ |
| 61 | CLS(Audio) \ | ||
| 62 | SUB(Audio, DSP) \ | ||
| 61 | CLS(Loader) | 63 | CLS(Loader) |
| 62 | 64 | ||
| 63 | // GetClassName is a macro defined by Windows.h, grrr... | 65 | // GetClassName is a macro defined by Windows.h, grrr... |
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index c1f4d08e4..795d42ebd 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h | |||
| @@ -27,25 +27,9 @@ struct Entry { | |||
| 27 | std::string message; | 27 | std::string message; |
| 28 | 28 | ||
| 29 | Entry() = default; | 29 | Entry() = default; |
| 30 | Entry(Entry&& o) = default; | ||
| 30 | 31 | ||
| 31 | // TODO(yuriks) Use defaulted move constructors once MSVC supports them | 32 | Entry& operator=(Entry&& o) = default; |
| 32 | #define MOVE(member) member(std::move(o.member)) | ||
| 33 | Entry(Entry&& o) | ||
| 34 | : MOVE(timestamp), MOVE(log_class), MOVE(log_level), | ||
| 35 | MOVE(location), MOVE(message) | ||
| 36 | {} | ||
| 37 | #undef MOVE | ||
| 38 | |||
| 39 | Entry& operator=(const Entry&& o) { | ||
| 40 | #define MOVE(member) member = std::move(o.member) | ||
| 41 | MOVE(timestamp); | ||
| 42 | MOVE(log_class); | ||
| 43 | MOVE(log_level); | ||
| 44 | MOVE(location); | ||
| 45 | MOVE(message); | ||
| 46 | #undef MOVE | ||
| 47 | return *this; | ||
| 48 | } | ||
| 49 | }; | 33 | }; |
| 50 | 34 | ||
| 51 | /** | 35 | /** |
diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 2d9323a7b..ec7bb00b8 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h | |||
| @@ -73,6 +73,8 @@ enum class Class : ClassType { | |||
| 73 | Render, ///< Emulator video output and hardware acceleration | 73 | Render, ///< Emulator video output and hardware acceleration |
| 74 | Render_Software, ///< Software renderer backend | 74 | Render_Software, ///< Software renderer backend |
| 75 | Render_OpenGL, ///< OpenGL backend | 75 | Render_OpenGL, ///< OpenGL backend |
| 76 | Audio, ///< Emulator audio output | ||
| 77 | Audio_DSP, ///< The HLE implementation of the DSP | ||
| 76 | Loader, ///< ROM loader | 78 | Loader, ///< ROM loader |
| 77 | 79 | ||
| 78 | Count ///< Total number of logging classes | 80 | Count ///< Total number of logging classes |
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 0cfb43fc7..862643448 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "audio_core/audio_core.h" | ||
| 11 | |||
| 10 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 11 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 12 | 14 | ||
| @@ -107,7 +109,6 @@ struct MemoryArea { | |||
| 107 | static MemoryArea memory_areas[] = { | 109 | static MemoryArea memory_areas[] = { |
| 108 | {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory | 110 | {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory |
| 109 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) | 111 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) |
| 110 | {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory | ||
| 111 | {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory | 112 | {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory |
| 112 | }; | 113 | }; |
| 113 | 114 | ||
| @@ -133,6 +134,8 @@ void InitLegacyAddressSpace(Kernel::VMManager& address_space) { | |||
| 133 | auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, | 134 | auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, |
| 134 | (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); | 135 | (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); |
| 135 | address_space.Reprotect(shared_page_vma, VMAPermission::Read); | 136 | address_space.Reprotect(shared_page_vma, VMAPermission::Read); |
| 137 | |||
| 138 | AudioCore::AddAddressSpace(address_space); | ||
| 136 | } | 139 | } |
| 137 | 140 | ||
| 138 | } // namespace | 141 | } // namespace |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index d148efde2..16eb972fb 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -35,7 +35,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) { | |||
| 35 | 35 | ||
| 36 | process->codeset = std::move(code_set); | 36 | process->codeset = std::move(code_set); |
| 37 | process->flags.raw = 0; | 37 | process->flags.raw = 0; |
| 38 | process->flags.memory_region = MemoryRegion::APPLICATION; | 38 | process->flags.memory_region.Assign(MemoryRegion::APPLICATION); |
| 39 | Memory::InitLegacyAddressSpace(process->vm_manager); | 39 | Memory::InitLegacyAddressSpace(process->vm_manager); |
| 40 | 40 | ||
| 41 | return process; | 41 | return process; |
diff --git a/src/core/hle/result.h b/src/core/hle/result.h index ea3abb5f6..0fce5988b 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h | |||
| @@ -193,10 +193,10 @@ union ResultCode { | |||
| 193 | explicit ResultCode(u32 raw) : raw(raw) {} | 193 | explicit ResultCode(u32 raw) : raw(raw) {} |
| 194 | ResultCode(ErrorDescription description_, ErrorModule module_, | 194 | ResultCode(ErrorDescription description_, ErrorModule module_, |
| 195 | ErrorSummary summary_, ErrorLevel level_) : raw(0) { | 195 | ErrorSummary summary_, ErrorLevel level_) : raw(0) { |
| 196 | description = description_; | 196 | description.Assign(description_); |
| 197 | module = module_; | 197 | module.Assign(module_); |
| 198 | summary = summary_; | 198 | summary.Assign(summary_); |
| 199 | level = level_; | 199 | level.Assign(level_); |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | ResultCode& operator=(const ResultCode& o) { raw = o.raw; return *this; } | 202 | ResultCode& operator=(const ResultCode& o) { raw = o.raw; return *this; } |
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp index 633fe19eb..7556aa6a5 100644 --- a/src/core/hle/service/cfg/cfg.cpp +++ b/src/core/hle/service/cfg/cfg.cpp | |||
| @@ -293,8 +293,8 @@ ResultCode DeleteConfigNANDSaveFile() { | |||
| 293 | 293 | ||
| 294 | ResultCode UpdateConfigNANDSavegame() { | 294 | ResultCode UpdateConfigNANDSavegame() { |
| 295 | FileSys::Mode mode = {}; | 295 | FileSys::Mode mode = {}; |
| 296 | mode.write_flag = 1; | 296 | mode.write_flag.Assign(1); |
| 297 | mode.create_flag = 1; | 297 | mode.create_flag.Assign(1); |
| 298 | 298 | ||
| 299 | FileSys::Path path("config"); | 299 | FileSys::Path path("config"); |
| 300 | 300 | ||
| @@ -405,7 +405,7 @@ void Init() { | |||
| 405 | 405 | ||
| 406 | FileSys::Path config_path("config"); | 406 | FileSys::Path config_path("config"); |
| 407 | FileSys::Mode open_mode = {}; | 407 | FileSys::Mode open_mode = {}; |
| 408 | open_mode.read_flag = 1; | 408 | open_mode.read_flag.Assign(1); |
| 409 | 409 | ||
| 410 | auto config_result = Service::FS::OpenFileFromArchive(*archive_result, config_path, open_mode); | 410 | auto config_result = Service::FS::OpenFileFromArchive(*archive_result, config_path, open_mode); |
| 411 | 411 | ||
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp index f9f931f6d..15d3274ec 100644 --- a/src/core/hle/service/dsp_dsp.cpp +++ b/src/core/hle/service/dsp_dsp.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "audio_core/hle/pipe.h" | ||
| 6 | |||
| 5 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 6 | 8 | ||
| 7 | #include "core/hle/kernel/event.h" | 9 | #include "core/hle/kernel/event.h" |
| @@ -14,17 +16,30 @@ namespace DSP_DSP { | |||
| 14 | 16 | ||
| 15 | static u32 read_pipe_count; | 17 | static u32 read_pipe_count; |
| 16 | static Kernel::SharedPtr<Kernel::Event> semaphore_event; | 18 | static Kernel::SharedPtr<Kernel::Event> semaphore_event; |
| 17 | static Kernel::SharedPtr<Kernel::Event> interrupt_event; | ||
| 18 | 19 | ||
| 19 | void SignalInterrupt() { | 20 | struct PairHash { |
| 20 | // TODO(bunnei): This is just a stub, it does not do anything other than signal to the emulated | 21 | template <typename T, typename U> |
| 21 | // application that a DSP interrupt occurred, without specifying which one. Since we do not | 22 | std::size_t operator()(const std::pair<T, U> &x) const { |
| 22 | // emulate the DSP yet (and how it works is largely unknown), this is a work around to get games | 23 | // TODO(yuriks): Replace with better hash combining function. |
| 23 | // that check the DSP interrupt signal event to run. We should figure out the different types of | 24 | return std::hash<T>()(x.first) ^ std::hash<U>()(x.second); |
| 24 | // DSP interrupts, and trigger them at the appropriate times. | 25 | } |
| 26 | }; | ||
| 27 | |||
| 28 | /// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents | ||
| 29 | static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events; | ||
| 30 | |||
| 31 | // DSP Interrupts: | ||
| 32 | // Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting | ||
| 33 | // for an interrupt event. Immediately after this interrupt event, userland normally updates the | ||
| 34 | // state in the next region and increments the relevant frame counter by two. | ||
| 35 | void SignalAllInterrupts() { | ||
| 36 | // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case. | ||
| 37 | for (auto& interrupt_event : interrupt_events) | ||
| 38 | interrupt_event.second->Signal(); | ||
| 39 | } | ||
| 25 | 40 | ||
| 26 | if (interrupt_event != 0) | 41 | void SignalInterrupt(u32 interrupt, u32 channel) { |
| 27 | interrupt_event->Signal(); | 42 | interrupt_events[std::make_pair(interrupt, channel)]->Signal(); |
| 28 | } | 43 | } |
| 29 | 44 | ||
| 30 | /** | 45 | /** |
| @@ -43,7 +58,7 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) { | |||
| 43 | cmd_buff[1] = 0; // No error | 58 | cmd_buff[1] = 0; // No error |
| 44 | cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); | 59 | cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); |
| 45 | 60 | ||
| 46 | LOG_WARNING(Service_DSP, "(STUBBED) called with address 0x%08X", addr); | 61 | LOG_TRACE(Service_DSP, "addr=0x%08X", addr); |
| 47 | } | 62 | } |
| 48 | 63 | ||
| 49 | /** | 64 | /** |
| @@ -121,8 +136,8 @@ static void FlushDataCache(Service::Interface* self) { | |||
| 121 | /** | 136 | /** |
| 122 | * DSP_DSP::RegisterInterruptEvents service function | 137 | * DSP_DSP::RegisterInterruptEvents service function |
| 123 | * Inputs: | 138 | * Inputs: |
| 124 | * 1 : Parameter 0 (purpose unknown) | 139 | * 1 : Interrupt Number |
| 125 | * 2 : Parameter 1 (purpose unknown) | 140 | * 2 : Channel Number |
| 126 | * 4 : Interrupt event handle | 141 | * 4 : Interrupt event handle |
| 127 | * Outputs: | 142 | * Outputs: |
| 128 | * 1 : Result of function, 0 on success, otherwise error code | 143 | * 1 : Result of function, 0 on success, otherwise error code |
| @@ -130,22 +145,24 @@ static void FlushDataCache(Service::Interface* self) { | |||
| 130 | static void RegisterInterruptEvents(Service::Interface* self) { | 145 | static void RegisterInterruptEvents(Service::Interface* self) { |
| 131 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 146 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 132 | 147 | ||
| 133 | u32 param0 = cmd_buff[1]; | 148 | u32 interrupt = cmd_buff[1]; |
| 134 | u32 param1 = cmd_buff[2]; | 149 | u32 channel = cmd_buff[2]; |
| 135 | u32 event_handle = cmd_buff[4]; | 150 | u32 event_handle = cmd_buff[4]; |
| 136 | 151 | ||
| 137 | auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); | 152 | if (event_handle) { |
| 138 | if (evt != nullptr) { | 153 | auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); |
| 139 | interrupt_event = evt; | 154 | if (evt) { |
| 140 | cmd_buff[1] = 0; // No error | 155 | interrupt_events[std::make_pair(interrupt, channel)] = evt; |
| 156 | cmd_buff[1] = RESULT_SUCCESS.raw; | ||
| 157 | LOG_WARNING(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); | ||
| 158 | } else { | ||
| 159 | cmd_buff[1] = -1; | ||
| 160 | LOG_ERROR(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); | ||
| 161 | } | ||
| 141 | } else { | 162 | } else { |
| 142 | LOG_ERROR(Service_DSP, "called with invalid handle=%08X", cmd_buff[4]); | 163 | interrupt_events.erase(std::make_pair(interrupt, channel)); |
| 143 | 164 | LOG_WARNING(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); | |
| 144 | // TODO(yuriks): An error should be returned from SendSyncRequest, not in the cmdbuf | ||
| 145 | cmd_buff[1] = -1; | ||
| 146 | } | 165 | } |
| 147 | |||
| 148 | LOG_WARNING(Service_DSP, "(STUBBED) called param0=%u, param1=%u, event_handle=0x%08X", param0, param1, event_handle); | ||
| 149 | } | 166 | } |
| 150 | 167 | ||
| 151 | /** | 168 | /** |
| @@ -158,8 +175,6 @@ static void RegisterInterruptEvents(Service::Interface* self) { | |||
| 158 | static void SetSemaphore(Service::Interface* self) { | 175 | static void SetSemaphore(Service::Interface* self) { |
| 159 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 176 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 160 | 177 | ||
| 161 | SignalInterrupt(); | ||
| 162 | |||
| 163 | cmd_buff[1] = 0; // No error | 178 | cmd_buff[1] = 0; // No error |
| 164 | 179 | ||
| 165 | LOG_WARNING(Service_DSP, "(STUBBED) called"); | 180 | LOG_WARNING(Service_DSP, "(STUBBED) called"); |
| @@ -168,9 +183,9 @@ static void SetSemaphore(Service::Interface* self) { | |||
| 168 | /** | 183 | /** |
| 169 | * DSP_DSP::WriteProcessPipe service function | 184 | * DSP_DSP::WriteProcessPipe service function |
| 170 | * Inputs: | 185 | * Inputs: |
| 171 | * 1 : Number | 186 | * 1 : Channel |
| 172 | * 2 : Size | 187 | * 2 : Size |
| 173 | * 3 : (size <<14) | 0x402 | 188 | * 3 : (size << 14) | 0x402 |
| 174 | * 4 : Buffer | 189 | * 4 : Buffer |
| 175 | * Outputs: | 190 | * Outputs: |
| 176 | * 0 : Return header | 191 | * 0 : Return header |
| @@ -179,21 +194,42 @@ static void SetSemaphore(Service::Interface* self) { | |||
| 179 | static void WriteProcessPipe(Service::Interface* self) { | 194 | static void WriteProcessPipe(Service::Interface* self) { |
| 180 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 195 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 181 | 196 | ||
| 182 | u32 number = cmd_buff[1]; | 197 | u32 channel = cmd_buff[1]; |
| 183 | u32 size = cmd_buff[2]; | 198 | u32 size = cmd_buff[2]; |
| 184 | u32 new_size = cmd_buff[3]; | ||
| 185 | u32 buffer = cmd_buff[4]; | 199 | u32 buffer = cmd_buff[4]; |
| 186 | 200 | ||
| 201 | if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) { | ||
| 202 | LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). channel=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], channel, size, buffer); | ||
| 203 | cmd_buff[1] = -1; // TODO | ||
| 204 | return; | ||
| 205 | } | ||
| 206 | |||
| 207 | if (!Memory::GetPointer(buffer)) { | ||
| 208 | LOG_ERROR(Service_DSP, "Invalid Buffer: channel=%u, size=0x%X, buffer=0x%08X", channel, size, buffer); | ||
| 209 | cmd_buff[1] = -1; // TODO | ||
| 210 | return; | ||
| 211 | } | ||
| 212 | |||
| 213 | std::vector<u8> message(size); | ||
| 214 | |||
| 215 | for (size_t i = 0; i < size; i++) { | ||
| 216 | message[i] = Memory::Read8(buffer + i); | ||
| 217 | } | ||
| 218 | |||
| 219 | DSP::HLE::PipeWrite(channel, message); | ||
| 220 | |||
| 187 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 221 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error |
| 188 | 222 | ||
| 189 | LOG_WARNING(Service_DSP, "(STUBBED) called number=%u, size=0x%X, new_size=0x%X, buffer=0x%08X", | 223 | LOG_TRACE(Service_DSP, "channel=%u, size=0x%X, buffer=0x%08X", channel, size, buffer); |
| 190 | number, size, new_size, buffer); | ||
| 191 | } | 224 | } |
| 192 | 225 | ||
| 193 | /** | 226 | /** |
| 194 | * DSP_DSP::ReadPipeIfPossible service function | 227 | * DSP_DSP::ReadPipeIfPossible service function |
| 228 | * A pipe is a means of communication between the ARM11 and DSP that occurs on | ||
| 229 | * hardware by writing to/reading from the DSP registers at 0x10203000. | ||
| 230 | * Pipes are used for initialisation. See also DSP::HLE::PipeRead. | ||
| 195 | * Inputs: | 231 | * Inputs: |
| 196 | * 1 : Unknown | 232 | * 1 : Pipe Number |
| 197 | * 2 : Unknown | 233 | * 2 : Unknown |
| 198 | * 3 : Size in bytes of read (observed only lower half word used) | 234 | * 3 : Size in bytes of read (observed only lower half word used) |
| 199 | * 0x41 : Virtual address to read from DSP pipe to in memory | 235 | * 0x41 : Virtual address to read from DSP pipe to in memory |
| @@ -204,35 +240,25 @@ static void WriteProcessPipe(Service::Interface* self) { | |||
| 204 | static void ReadPipeIfPossible(Service::Interface* self) { | 240 | static void ReadPipeIfPossible(Service::Interface* self) { |
| 205 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 241 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 206 | 242 | ||
| 207 | u32 unk1 = cmd_buff[1]; | 243 | u32 pipe = cmd_buff[1]; |
| 208 | u32 unk2 = cmd_buff[2]; | 244 | u32 unk2 = cmd_buff[2]; |
| 209 | u32 size = cmd_buff[3] & 0xFFFF;// Lower 16 bits are size | 245 | u32 size = cmd_buff[3] & 0xFFFF;// Lower 16 bits are size |
| 210 | VAddr addr = cmd_buff[0x41]; | 246 | VAddr addr = cmd_buff[0x41]; |
| 211 | 247 | ||
| 212 | // Canned DSP responses that games expect. These were taken from HW by 3dmoo team. | 248 | if (!Memory::GetPointer(addr)) { |
| 213 | // TODO: Remove this hack :) | 249 | LOG_ERROR(Service_DSP, "Invalid addr: pipe=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", pipe, unk2, size, addr); |
| 214 | static const std::array<u16, 16> canned_read_pipe = {{ | 250 | cmd_buff[1] = -1; // TODO |
| 215 | 0x000F, 0xBFFF, 0x9E8E, 0x8680, 0xA78E, 0x9430, 0x8400, 0x8540, | 251 | return; |
| 216 | 0x948E, 0x8710, 0x8410, 0xA90E, 0xAA0E, 0xAACE, 0xAC4E, 0xAC58 | 252 | } |
| 217 | }}; | ||
| 218 | 253 | ||
| 219 | u32 initial_size = read_pipe_count; | 254 | std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); |
| 220 | 255 | ||
| 221 | for (unsigned offset = 0; offset < size; offset += sizeof(u16)) { | 256 | Memory::WriteBlock(addr, response.data(), response.size()); |
| 222 | if (read_pipe_count < canned_read_pipe.size()) { | ||
| 223 | Memory::Write16(addr + offset, canned_read_pipe[read_pipe_count]); | ||
| 224 | read_pipe_count++; | ||
| 225 | } else { | ||
| 226 | LOG_ERROR(Service_DSP, "canned read pipe log exceeded!"); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | 257 | ||
| 231 | cmd_buff[1] = 0; // No error | 258 | cmd_buff[1] = 0; // No error |
| 232 | cmd_buff[2] = (read_pipe_count - initial_size) * sizeof(u16); | 259 | cmd_buff[2] = (u32)response.size(); |
| 233 | 260 | ||
| 234 | LOG_WARNING(Service_DSP, "(STUBBED) called unk1=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", | 261 | LOG_TRACE(Service_DSP, "pipe=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", pipe, unk2, size, addr); |
| 235 | unk1, unk2, size, addr); | ||
| 236 | } | 262 | } |
| 237 | 263 | ||
| 238 | /** | 264 | /** |
| @@ -311,7 +337,6 @@ const Interface::FunctionInfo FunctionTable[] = { | |||
| 311 | 337 | ||
| 312 | Interface::Interface() { | 338 | Interface::Interface() { |
| 313 | semaphore_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "DSP_DSP::semaphore_event"); | 339 | semaphore_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "DSP_DSP::semaphore_event"); |
| 314 | interrupt_event = nullptr; | ||
| 315 | read_pipe_count = 0; | 340 | read_pipe_count = 0; |
| 316 | 341 | ||
| 317 | Register(FunctionTable); | 342 | Register(FunctionTable); |
| @@ -319,7 +344,7 @@ Interface::Interface() { | |||
| 319 | 344 | ||
| 320 | Interface::~Interface() { | 345 | Interface::~Interface() { |
| 321 | semaphore_event = nullptr; | 346 | semaphore_event = nullptr; |
| 322 | interrupt_event = nullptr; | 347 | interrupt_events.clear(); |
| 323 | } | 348 | } |
| 324 | 349 | ||
| 325 | } // namespace | 350 | } // namespace |
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h index b6f611db5..32b89e9bb 100644 --- a/src/core/hle/service/dsp_dsp.h +++ b/src/core/hle/service/dsp_dsp.h | |||
| @@ -23,7 +23,15 @@ public: | |||
| 23 | } | 23 | } |
| 24 | }; | 24 | }; |
| 25 | 25 | ||
| 26 | /// Signals that a DSP interrupt has occurred to userland code | 26 | /// Signal all audio related interrupts. |
| 27 | void SignalInterrupt(); | 27 | void SignalAllInterrupts(); |
| 28 | |||
| 29 | /** | ||
| 30 | * Signal a specific audio related interrupt based on interrupt id and channel id. | ||
| 31 | * @param interrupt_id The interrupt id | ||
| 32 | * @param channel_id The channel id | ||
| 33 | * The significance of various values of interrupt_id and channel_id is not yet known. | ||
| 34 | */ | ||
| 35 | void SignalInterrupt(u32 interrupt_id, u32 channel_id); | ||
| 28 | 36 | ||
| 29 | } // namespace | 37 | } // namespace |
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 98b11c798..5838b6d71 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp | |||
| @@ -347,7 +347,7 @@ void SignalInterrupt(InterruptId interrupt_id) { | |||
| 347 | FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); | 347 | FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); |
| 348 | if (info->is_dirty) { | 348 | if (info->is_dirty) { |
| 349 | SetBufferSwap(screen_id, info->framebuffer_info[info->index]); | 349 | SetBufferSwap(screen_id, info->framebuffer_info[info->index]); |
| 350 | info->is_dirty = false; | 350 | info->is_dirty.Assign(false); |
| 351 | } | 351 | } |
| 352 | } | 352 | } |
| 353 | } | 353 | } |
| @@ -499,7 +499,7 @@ static void SetLcdForceBlack(Service::Interface* self) { | |||
| 499 | 499 | ||
| 500 | // Since data is already zeroed, there is no need to explicitly set | 500 | // Since data is already zeroed, there is no need to explicitly set |
| 501 | // the color to black (all zero). | 501 | // the color to black (all zero). |
| 502 | data.is_enabled = enable_black; | 502 | data.is_enabled.Assign(enable_black); |
| 503 | 503 | ||
| 504 | LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD | 504 | LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD |
| 505 | LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD | 505 | LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD |
| @@ -521,7 +521,7 @@ static void TriggerCmdReqQueue(Service::Interface* self) { | |||
| 521 | ExecuteCommand(command_buffer->commands[i], thread_id); | 521 | ExecuteCommand(command_buffer->commands[i], thread_id); |
| 522 | 522 | ||
| 523 | // Indicates that command has completed | 523 | // Indicates that command has completed |
| 524 | command_buffer->number_commands = command_buffer->number_commands - 1; | 524 | command_buffer->number_commands.Assign(command_buffer->number_commands - 1); |
| 525 | } | 525 | } |
| 526 | } | 526 | } |
| 527 | 527 | ||
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 0bed0ce36..11d7e69a1 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -105,7 +105,7 @@ void Update() { | |||
| 105 | bool pressed = false; | 105 | bool pressed = false; |
| 106 | 106 | ||
| 107 | std::tie(touch_entry->x, touch_entry->y, pressed) = VideoCore::g_emu_window->GetTouchState(); | 107 | std::tie(touch_entry->x, touch_entry->y, pressed) = VideoCore::g_emu_window->GetTouchState(); |
| 108 | touch_entry->valid = pressed ? 1 : 0; | 108 | touch_entry->valid.Assign(pressed ? 1 : 0); |
| 109 | 109 | ||
| 110 | // TODO(bunnei): We're not doing anything with offset 0xA8 + 0x18 of HID SharedMemory, which | 110 | // TODO(bunnei): We're not doing anything with offset 0xA8 + 0x18 of HID SharedMemory, which |
| 111 | // supposedly is "Touch-screen entry, which contains the raw coordinate data prior to being | 111 | // supposedly is "Touch-screen entry, which contains the raw coordinate data prior to being |
diff --git a/src/core/hle/service/ptm/ptm.cpp b/src/core/hle/service/ptm/ptm.cpp index 22c1093ff..6bdee4d9e 100644 --- a/src/core/hle/service/ptm/ptm.cpp +++ b/src/core/hle/service/ptm/ptm.cpp | |||
| @@ -110,8 +110,8 @@ void Init() { | |||
| 110 | 110 | ||
| 111 | FileSys::Path gamecoin_path("gamecoin.dat"); | 111 | FileSys::Path gamecoin_path("gamecoin.dat"); |
| 112 | FileSys::Mode open_mode = {}; | 112 | FileSys::Mode open_mode = {}; |
| 113 | open_mode.write_flag = 1; | 113 | open_mode.write_flag.Assign(1); |
| 114 | open_mode.create_flag = 1; | 114 | open_mode.create_flag.Assign(1); |
| 115 | // Open the file and write the default gamecoin information | 115 | // Open the file and write the default gamecoin information |
| 116 | auto gamecoin_result = Service::FS::OpenFileFromArchive(*archive_result, gamecoin_path, open_mode); | 116 | auto gamecoin_result = Service::FS::OpenFileFromArchive(*archive_result, gamecoin_path, open_mode); |
| 117 | if (gamecoin_result.Succeeded()) { | 117 | if (gamecoin_result.Succeeded()) { |
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp index 822b093f4..e603bf794 100644 --- a/src/core/hle/service/soc_u.cpp +++ b/src/core/hle/service/soc_u.cpp | |||
| @@ -178,17 +178,17 @@ struct CTRPollFD { | |||
| 178 | static Events TranslateTo3DS(u32 input_event) { | 178 | static Events TranslateTo3DS(u32 input_event) { |
| 179 | Events ev = {}; | 179 | Events ev = {}; |
| 180 | if (input_event & POLLIN) | 180 | if (input_event & POLLIN) |
| 181 | ev.pollin = 1; | 181 | ev.pollin.Assign(1); |
| 182 | if (input_event & POLLPRI) | 182 | if (input_event & POLLPRI) |
| 183 | ev.pollpri = 1; | 183 | ev.pollpri.Assign(1); |
| 184 | if (input_event & POLLHUP) | 184 | if (input_event & POLLHUP) |
| 185 | ev.pollhup = 1; | 185 | ev.pollhup.Assign(1); |
| 186 | if (input_event & POLLERR) | 186 | if (input_event & POLLERR) |
| 187 | ev.pollerr = 1; | 187 | ev.pollerr.Assign(1); |
| 188 | if (input_event & POLLOUT) | 188 | if (input_event & POLLOUT) |
| 189 | ev.pollout = 1; | 189 | ev.pollout.Assign(1); |
| 190 | if (input_event & POLLNVAL) | 190 | if (input_event & POLLNVAL) |
| 191 | ev.pollnval = 1; | 191 | ev.pollnval.Assign(1); |
| 192 | return ev; | 192 | return ev; |
| 193 | } | 193 | } |
| 194 | 194 | ||
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 4bd3a632d..5312baa83 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include "core/core_timing.h" | 17 | #include "core/core_timing.h" |
| 18 | 18 | ||
| 19 | #include "core/hle/service/gsp_gpu.h" | 19 | #include "core/hle/service/gsp_gpu.h" |
| 20 | #include "core/hle/service/dsp_dsp.h" | ||
| 21 | #include "core/hle/service/hid/hid.h" | 20 | #include "core/hle/service/hid/hid.h" |
| 22 | 21 | ||
| 23 | #include "core/hw/hw.h" | 22 | #include "core/hw/hw.h" |
| @@ -146,8 +145,8 @@ inline void Write(u32 addr, const T data) { | |||
| 146 | 145 | ||
| 147 | // Reset "trigger" flag and set the "finish" flag | 146 | // Reset "trigger" flag and set the "finish" flag |
| 148 | // NOTE: This was confirmed to happen on hardware even if "address_start" is zero. | 147 | // NOTE: This was confirmed to happen on hardware even if "address_start" is zero. |
| 149 | config.trigger = 0; | 148 | config.trigger.Assign(0); |
| 150 | config.finished = 1; | 149 | config.finished.Assign(1); |
| 151 | } | 150 | } |
| 152 | break; | 151 | break; |
| 153 | } | 152 | } |
| @@ -414,11 +413,6 @@ static void VBlankCallback(u64 userdata, int cycles_late) { | |||
| 414 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); | 413 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); |
| 415 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); | 414 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); |
| 416 | 415 | ||
| 417 | // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but | ||
| 418 | // until we can emulate DSP interrupts, this is probably the only reasonable place to do | ||
| 419 | // this. Certain games expect this to be periodically signaled. | ||
| 420 | DSP_DSP::SignalInterrupt(); | ||
| 421 | |||
| 422 | // Check for user input updates | 416 | // Check for user input updates |
| 423 | Service::HID::Update(); | 417 | Service::HID::Update(); |
| 424 | 418 | ||
| @@ -444,16 +438,16 @@ void Init() { | |||
| 444 | framebuffer_sub.address_left1 = 0x1848F000; | 438 | framebuffer_sub.address_left1 = 0x1848F000; |
| 445 | framebuffer_sub.address_left2 = 0x184C7800; | 439 | framebuffer_sub.address_left2 = 0x184C7800; |
| 446 | 440 | ||
| 447 | framebuffer_top.width = 240; | 441 | framebuffer_top.width.Assign(240); |
| 448 | framebuffer_top.height = 400; | 442 | framebuffer_top.height.Assign(400); |
| 449 | framebuffer_top.stride = 3 * 240; | 443 | framebuffer_top.stride = 3 * 240; |
| 450 | framebuffer_top.color_format = Regs::PixelFormat::RGB8; | 444 | framebuffer_top.color_format.Assign(Regs::PixelFormat::RGB8); |
| 451 | framebuffer_top.active_fb = 0; | 445 | framebuffer_top.active_fb = 0; |
| 452 | 446 | ||
| 453 | framebuffer_sub.width = 240; | 447 | framebuffer_sub.width.Assign(240); |
| 454 | framebuffer_sub.height = 320; | 448 | framebuffer_sub.height.Assign(320); |
| 455 | framebuffer_sub.stride = 3 * 240; | 449 | framebuffer_sub.stride = 3 * 240; |
| 456 | framebuffer_sub.color_format = Regs::PixelFormat::RGB8; | 450 | framebuffer_sub.color_format.Assign(Regs::PixelFormat::RGB8); |
| 457 | framebuffer_sub.active_fb = 0; | 451 | framebuffer_sub.active_fb = 0; |
| 458 | 452 | ||
| 459 | last_skip_frame = false; | 453 | last_skip_frame = false; |
diff --git a/src/core/system.cpp b/src/core/system.cpp index 7e9c56538..b62ebf69e 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp | |||
| @@ -2,9 +2,12 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "audio_core/audio_core.h" | ||
| 6 | |||
| 5 | #include "core/core.h" | 7 | #include "core/core.h" |
| 6 | #include "core/core_timing.h" | 8 | #include "core/core_timing.h" |
| 7 | #include "core/system.h" | 9 | #include "core/system.h" |
| 10 | #include "core/gdbstub/gdbstub.h" | ||
| 8 | #include "core/hw/hw.h" | 11 | #include "core/hw/hw.h" |
| 9 | #include "core/hle/hle.h" | 12 | #include "core/hle/hle.h" |
| 10 | #include "core/hle/kernel/kernel.h" | 13 | #include "core/hle/kernel/kernel.h" |
| @@ -12,8 +15,6 @@ | |||
| 12 | 15 | ||
| 13 | #include "video_core/video_core.h" | 16 | #include "video_core/video_core.h" |
| 14 | 17 | ||
| 15 | #include "core/gdbstub/gdbstub.h" | ||
| 16 | |||
| 17 | namespace System { | 18 | namespace System { |
| 18 | 19 | ||
| 19 | void Init(EmuWindow* emu_window) { | 20 | void Init(EmuWindow* emu_window) { |
| @@ -24,11 +25,13 @@ void Init(EmuWindow* emu_window) { | |||
| 24 | Kernel::Init(); | 25 | Kernel::Init(); |
| 25 | HLE::Init(); | 26 | HLE::Init(); |
| 26 | VideoCore::Init(emu_window); | 27 | VideoCore::Init(emu_window); |
| 28 | AudioCore::Init(); | ||
| 27 | GDBStub::Init(); | 29 | GDBStub::Init(); |
| 28 | } | 30 | } |
| 29 | 31 | ||
| 30 | void Shutdown() { | 32 | void Shutdown() { |
| 31 | GDBStub::Shutdown(); | 33 | GDBStub::Shutdown(); |
| 34 | AudioCore::Shutdown(); | ||
| 32 | VideoCore::Shutdown(); | 35 | VideoCore::Shutdown(); |
| 33 | HLE::Shutdown(); | 36 | HLE::Shutdown(); |
| 34 | Kernel::Shutdown(); | 37 | Kernel::Shutdown(); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c3d7294d5..4b5d298f3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -33,6 +33,7 @@ set(HEADERS | |||
| 33 | command_processor.h | 33 | command_processor.h |
| 34 | gpu_debugger.h | 34 | gpu_debugger.h |
| 35 | pica.h | 35 | pica.h |
| 36 | pica_types.h | ||
| 36 | primitive_assembly.h | 37 | primitive_assembly.h |
| 37 | rasterizer.h | 38 | rasterizer.h |
| 38 | rasterizer_interface.h | 39 | rasterizer_interface.h |
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 5d609da06..a385589d2 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -59,15 +59,17 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 59 | } viewport; | 59 | } viewport; |
| 60 | 60 | ||
| 61 | const auto& regs = g_state.regs; | 61 | const auto& regs = g_state.regs; |
| 62 | viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); | 62 | viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); |
| 63 | viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); | 63 | viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); |
| 64 | viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); | 64 | viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); |
| 65 | viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); | 65 | viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); |
| 66 | viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range); | 66 | viewport.zscale = float24::FromRaw(regs.viewport_depth_range); |
| 67 | viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane); | 67 | viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); |
| 68 | 68 | ||
| 69 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; | 69 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; |
| 70 | vtx.color *= inv_w; | 70 | vtx.color *= inv_w; |
| 71 | vtx.view *= inv_w; | ||
| 72 | vtx.quat *= inv_w; | ||
| 71 | vtx.tc0 *= inv_w; | 73 | vtx.tc0 *= inv_w; |
| 72 | vtx.tc1 *= inv_w; | 74 | vtx.tc1 *= inv_w; |
| 73 | vtx.tc2 *= inv_w; | 75 | vtx.tc2 *= inv_w; |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 59c75042c..73fdfbe9c 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -98,10 +98,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 98 | Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; | 98 | Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; |
| 99 | 99 | ||
| 100 | // NOTE: The destination component order indeed is "backwards" | 100 | // NOTE: The destination component order indeed is "backwards" |
| 101 | attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); | 101 | attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); |
| 102 | attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); | 102 | attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); |
| 103 | attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); | 103 | attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); |
| 104 | attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); | 104 | attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); |
| 105 | 105 | ||
| 106 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, | 106 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, |
| 107 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), | 107 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), |
| @@ -157,15 +157,25 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 157 | 157 | ||
| 158 | // TODO: What happens if a loader overwrites a previous one's data? | 158 | // TODO: What happens if a loader overwrites a previous one's data? |
| 159 | for (unsigned component = 0; component < loader_config.component_count; ++component) { | 159 | for (unsigned component = 0; component < loader_config.component_count; ++component) { |
| 160 | if (component >= 12) | 160 | if (component >= 12) { |
| 161 | LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); | 161 | LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); |
| 162 | continue; | ||
| 163 | } | ||
| 164 | |||
| 162 | u32 attribute_index = loader_config.GetComponent(component); | 165 | u32 attribute_index = loader_config.GetComponent(component); |
| 163 | vertex_attribute_sources[attribute_index] = load_address; | 166 | if (attribute_index < 12) { |
| 164 | vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | 167 | vertex_attribute_sources[attribute_index] = load_address; |
| 165 | vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | 168 | vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); |
| 166 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | 169 | vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); |
| 167 | vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); | 170 | vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); |
| 168 | load_address += attribute_config.GetStride(attribute_index); | 171 | vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); |
| 172 | load_address += attribute_config.GetStride(attribute_index); | ||
| 173 | } else if (attribute_index < 16) { | ||
| 174 | // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively | ||
| 175 | load_address += (attribute_index - 11) * 4; | ||
| 176 | } else { | ||
| 177 | UNREACHABLE(); // This is truly unreachable due to the number of bits for each component | ||
| 178 | } | ||
| 169 | } | 179 | } |
| 170 | } | 180 | } |
| 171 | 181 | ||
| @@ -418,10 +428,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 418 | uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | 428 | uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); |
| 419 | } else { | 429 | } else { |
| 420 | // TODO: Untested | 430 | // TODO: Untested |
| 421 | uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); | 431 | uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); |
| 422 | uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | 432 | uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); |
| 423 | uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); | 433 | uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); |
| 424 | uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); | 434 | uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); |
| 425 | } | 435 | } |
| 426 | 436 | ||
| 427 | LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, | 437 | LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, |
| @@ -429,7 +439,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 429 | uniform.w.ToFloat32()); | 439 | uniform.w.ToFloat32()); |
| 430 | 440 | ||
| 431 | // TODO: Verify that this actually modifies the register! | 441 | // TODO: Verify that this actually modifies the register! |
| 432 | uniform_setup.index = uniform_setup.index + 1; | 442 | uniform_setup.index.Assign(uniform_setup.index + 1); |
| 433 | } | 443 | } |
| 434 | break; | 444 | break; |
| 435 | } | 445 | } |
| @@ -464,6 +474,24 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 464 | break; | 474 | break; |
| 465 | } | 475 | } |
| 466 | 476 | ||
| 477 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): | ||
| 478 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): | ||
| 479 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): | ||
| 480 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): | ||
| 481 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): | ||
| 482 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): | ||
| 483 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): | ||
| 484 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): | ||
| 485 | { | ||
| 486 | auto& lut_config = regs.lighting.lut_config; | ||
| 487 | |||
| 488 | ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); | ||
| 489 | |||
| 490 | g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; | ||
| 491 | lut_config.index.Assign(lut_config.index + 1); | ||
| 492 | break; | ||
| 493 | } | ||
| 494 | |||
| 467 | default: | 495 | default: |
| 468 | break; | 496 | break; |
| 469 | } | 497 | } |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 4f66dbd65..6e6fd7335 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -201,11 +201,11 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c | |||
| 201 | 201 | ||
| 202 | if (it == output_info_table.end()) { | 202 | if (it == output_info_table.end()) { |
| 203 | output_info_table.emplace_back(); | 203 | output_info_table.emplace_back(); |
| 204 | output_info_table.back().type = type; | 204 | output_info_table.back().type.Assign(type); |
| 205 | output_info_table.back().component_mask = component_mask; | 205 | output_info_table.back().component_mask.Assign(component_mask); |
| 206 | output_info_table.back().id = i; | 206 | output_info_table.back().id.Assign(i); |
| 207 | } else { | 207 | } else { |
| 208 | it->component_mask = it->component_mask | component_mask; | 208 | it->component_mask.Assign(it->component_mask | component_mask); |
| 209 | } | 209 | } |
| 210 | } catch (const std::out_of_range& ) { | 210 | } catch (const std::out_of_range& ) { |
| 211 | DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); | 211 | DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 2f1b2dec4..9077b1725 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -16,6 +16,8 @@ | |||
| 16 | #include "common/vector_math.h" | 16 | #include "common/vector_math.h" |
| 17 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| 18 | 18 | ||
| 19 | #include "pica_types.h" | ||
| 20 | |||
| 19 | namespace Pica { | 21 | namespace Pica { |
| 20 | 22 | ||
| 21 | // Returns index corresponding to the Regs member labeled by field_name | 23 | // Returns index corresponding to the Regs member labeled by field_name |
| @@ -239,7 +241,8 @@ struct Regs { | |||
| 239 | TextureConfig texture0; | 241 | TextureConfig texture0; |
| 240 | INSERT_PADDING_WORDS(0x8); | 242 | INSERT_PADDING_WORDS(0x8); |
| 241 | BitField<0, 4, TextureFormat> texture0_format; | 243 | BitField<0, 4, TextureFormat> texture0_format; |
| 242 | INSERT_PADDING_WORDS(0x2); | 244 | BitField<0, 1, u32> fragment_lighting_enable; |
| 245 | INSERT_PADDING_WORDS(0x1); | ||
| 243 | TextureConfig texture1; | 246 | TextureConfig texture1; |
| 244 | BitField<0, 4, TextureFormat> texture1_format; | 247 | BitField<0, 4, TextureFormat> texture1_format; |
| 245 | INSERT_PADDING_WORDS(0x2); | 248 | INSERT_PADDING_WORDS(0x2); |
| @@ -641,7 +644,268 @@ struct Regs { | |||
| 641 | } | 644 | } |
| 642 | } | 645 | } |
| 643 | 646 | ||
| 644 | INSERT_PADDING_WORDS(0xe0); | 647 | INSERT_PADDING_WORDS(0x20); |
| 648 | |||
| 649 | enum class LightingSampler { | ||
| 650 | Distribution0 = 0, | ||
| 651 | Distribution1 = 1, | ||
| 652 | Fresnel = 3, | ||
| 653 | ReflectBlue = 4, | ||
| 654 | ReflectGreen = 5, | ||
| 655 | ReflectRed = 6, | ||
| 656 | SpotlightAttenuation = 8, | ||
| 657 | DistanceAttenuation = 16, | ||
| 658 | }; | ||
| 659 | |||
| 660 | /** | ||
| 661 | * Pica fragment lighting supports using different LUTs for each lighting component: | ||
| 662 | * Reflectance R, G, and B channels, distribution function for specular components 0 and 1, | ||
| 663 | * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel | ||
| 664 | * (or whether a channel is enabled at all) is specified by various pre-defined lighting | ||
| 665 | * configurations. With configurations that require more LUTs, more cycles are required on HW to | ||
| 666 | * perform lighting computations. | ||
| 667 | */ | ||
| 668 | enum class LightingConfig { | ||
| 669 | Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight | ||
| 670 | Config1 = 1, ///< Reflect Red, Fresnel, Spotlight | ||
| 671 | Config2 = 2, ///< Reflect Red, Distribution 0/1 | ||
| 672 | Config3 = 3, ///< Distribution 0/1, Fresnel | ||
| 673 | Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight | ||
| 674 | Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight | ||
| 675 | Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight | ||
| 676 | Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight | ||
| 677 | ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration | ||
| 678 | }; | ||
| 679 | |||
| 680 | /// Selects which lighting components are affected by fresnel | ||
| 681 | enum class LightingFresnelSelector { | ||
| 682 | None = 0, ///< Fresnel is disabled | ||
| 683 | PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel | ||
| 684 | SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel | ||
| 685 | Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel | ||
| 686 | }; | ||
| 687 | |||
| 688 | /// Factor used to scale the output of a lighting LUT | ||
| 689 | enum class LightingScale { | ||
| 690 | Scale1 = 0, ///< Scale is 1x | ||
| 691 | Scale2 = 1, ///< Scale is 2x | ||
| 692 | Scale4 = 2, ///< Scale is 4x | ||
| 693 | Scale8 = 3, ///< Scale is 8x | ||
| 694 | Scale1_4 = 6, ///< Scale is 0.25x | ||
| 695 | Scale1_2 = 7, ///< Scale is 0.5x | ||
| 696 | }; | ||
| 697 | |||
| 698 | enum class LightingLutInput { | ||
| 699 | NH = 0, // Cosine of the angle between the normal and half-angle vectors | ||
| 700 | VH = 1, // Cosine of the angle between the view and half-angle vectors | ||
| 701 | NV = 2, // Cosine of the angle between the normal and the view vector | ||
| 702 | LN = 3, // Cosine of the angle between the light and the normal vectors | ||
| 703 | }; | ||
| 704 | |||
| 705 | enum class LightingBumpMode : u32 { | ||
| 706 | None = 0, | ||
| 707 | NormalMap = 1, | ||
| 708 | TangentMap = 2, | ||
| 709 | }; | ||
| 710 | |||
| 711 | union LightColor { | ||
| 712 | BitField< 0, 10, u32> b; | ||
| 713 | BitField<10, 10, u32> g; | ||
| 714 | BitField<20, 10, u32> r; | ||
| 715 | |||
| 716 | Math::Vec3f ToVec3f() const { | ||
| 717 | // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component | ||
| 718 | return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); | ||
| 719 | } | ||
| 720 | }; | ||
| 721 | |||
| 722 | /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration | ||
| 723 | static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { | ||
| 724 | switch (sampler) { | ||
| 725 | case LightingSampler::Distribution0: | ||
| 726 | return (config != LightingConfig::Config1); | ||
| 727 | |||
| 728 | case LightingSampler::Distribution1: | ||
| 729 | return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); | ||
| 730 | |||
| 731 | case LightingSampler::Fresnel: | ||
| 732 | return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); | ||
| 733 | |||
| 734 | case LightingSampler::ReflectRed: | ||
| 735 | return (config != LightingConfig::Config3); | ||
| 736 | |||
| 737 | case LightingSampler::ReflectGreen: | ||
| 738 | case LightingSampler::ReflectBlue: | ||
| 739 | return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); | ||
| 740 | } | ||
| 741 | return false; | ||
| 742 | } | ||
| 743 | |||
| 744 | struct { | ||
| 745 | struct LightSrc { | ||
| 746 | LightColor specular_0; // material.specular_0 * light.specular_0 | ||
| 747 | LightColor specular_1; // material.specular_1 * light.specular_1 | ||
| 748 | LightColor diffuse; // material.diffuse * light.diffuse | ||
| 749 | LightColor ambient; // material.ambient * light.ambient | ||
| 750 | |||
| 751 | struct { | ||
| 752 | // Encoded as 16-bit floating point | ||
| 753 | union { | ||
| 754 | BitField< 0, 16, u32> x; | ||
| 755 | BitField<16, 16, u32> y; | ||
| 756 | }; | ||
| 757 | union { | ||
| 758 | BitField< 0, 16, u32> z; | ||
| 759 | }; | ||
| 760 | |||
| 761 | INSERT_PADDING_WORDS(0x3); | ||
| 762 | |||
| 763 | union { | ||
| 764 | BitField<0, 1, u32> directional; | ||
| 765 | BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 | ||
| 766 | }; | ||
| 767 | }; | ||
| 768 | |||
| 769 | BitField<0, 20, u32> dist_atten_bias; | ||
| 770 | BitField<0, 20, u32> dist_atten_scale; | ||
| 771 | |||
| 772 | INSERT_PADDING_WORDS(0x4); | ||
| 773 | }; | ||
| 774 | static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); | ||
| 775 | |||
| 776 | LightSrc light[8]; | ||
| 777 | LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) | ||
| 778 | INSERT_PADDING_WORDS(0x1); | ||
| 779 | BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 | ||
| 780 | |||
| 781 | union { | ||
| 782 | BitField< 2, 2, LightingFresnelSelector> fresnel_selector; | ||
| 783 | BitField< 4, 4, LightingConfig> config; | ||
| 784 | BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 | ||
| 785 | BitField<27, 1, u32> clamp_highlights; | ||
| 786 | BitField<28, 2, LightingBumpMode> bump_mode; | ||
| 787 | BitField<30, 1, u32> disable_bump_renorm; | ||
| 788 | }; | ||
| 789 | |||
| 790 | union { | ||
| 791 | BitField<16, 1, u32> disable_lut_d0; | ||
| 792 | BitField<17, 1, u32> disable_lut_d1; | ||
| 793 | BitField<19, 1, u32> disable_lut_fr; | ||
| 794 | BitField<20, 1, u32> disable_lut_rr; | ||
| 795 | BitField<21, 1, u32> disable_lut_rg; | ||
| 796 | BitField<22, 1, u32> disable_lut_rb; | ||
| 797 | |||
| 798 | // Each bit specifies whether distance attenuation should be applied for the | ||
| 799 | // corresponding light | ||
| 800 | |||
| 801 | BitField<24, 1, u32> disable_dist_atten_light_0; | ||
| 802 | BitField<25, 1, u32> disable_dist_atten_light_1; | ||
| 803 | BitField<26, 1, u32> disable_dist_atten_light_2; | ||
| 804 | BitField<27, 1, u32> disable_dist_atten_light_3; | ||
| 805 | BitField<28, 1, u32> disable_dist_atten_light_4; | ||
| 806 | BitField<29, 1, u32> disable_dist_atten_light_5; | ||
| 807 | BitField<30, 1, u32> disable_dist_atten_light_6; | ||
| 808 | BitField<31, 1, u32> disable_dist_atten_light_7; | ||
| 809 | }; | ||
| 810 | |||
| 811 | bool IsDistAttenDisabled(unsigned index) const { | ||
| 812 | const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1, | ||
| 813 | disable_dist_atten_light_2, disable_dist_atten_light_3, | ||
| 814 | disable_dist_atten_light_4, disable_dist_atten_light_5, | ||
| 815 | disable_dist_atten_light_6, disable_dist_atten_light_7 }; | ||
| 816 | return disable[index] != 0; | ||
| 817 | } | ||
| 818 | |||
| 819 | union { | ||
| 820 | BitField<0, 8, u32> index; ///< Index at which to set data in the LUT | ||
| 821 | BitField<8, 5, u32> type; ///< Type of LUT for which to set data | ||
| 822 | } lut_config; | ||
| 823 | |||
| 824 | BitField<0, 1, u32> disable; | ||
| 825 | INSERT_PADDING_WORDS(0x1); | ||
| 826 | |||
| 827 | // When data is written to any of these registers, it gets written to the lookup table of | ||
| 828 | // the selected type at the selected index, specified above in the `lut_config` register. | ||
| 829 | // With each write, `lut_config.index` is incremented. It does not matter which of these | ||
| 830 | // registers is written to, the behavior will be the same. | ||
| 831 | u32 lut_data[8]; | ||
| 832 | |||
| 833 | // These are used to specify if absolute (abs) value should be used for each LUT index. When | ||
| 834 | // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in | ||
| 835 | // the range of (0.0, 1.0). | ||
| 836 | union { | ||
| 837 | BitField< 1, 1, u32> disable_d0; | ||
| 838 | BitField< 5, 1, u32> disable_d1; | ||
| 839 | BitField< 9, 1, u32> disable_sp; | ||
| 840 | BitField<13, 1, u32> disable_fr; | ||
| 841 | BitField<17, 1, u32> disable_rb; | ||
| 842 | BitField<21, 1, u32> disable_rg; | ||
| 843 | BitField<25, 1, u32> disable_rr; | ||
| 844 | } abs_lut_input; | ||
| 845 | |||
| 846 | union { | ||
| 847 | BitField< 0, 3, LightingLutInput> d0; | ||
| 848 | BitField< 4, 3, LightingLutInput> d1; | ||
| 849 | BitField< 8, 3, LightingLutInput> sp; | ||
| 850 | BitField<12, 3, LightingLutInput> fr; | ||
| 851 | BitField<16, 3, LightingLutInput> rb; | ||
| 852 | BitField<20, 3, LightingLutInput> rg; | ||
| 853 | BitField<24, 3, LightingLutInput> rr; | ||
| 854 | } lut_input; | ||
| 855 | |||
| 856 | union { | ||
| 857 | BitField< 0, 3, LightingScale> d0; | ||
| 858 | BitField< 4, 3, LightingScale> d1; | ||
| 859 | BitField< 8, 3, LightingScale> sp; | ||
| 860 | BitField<12, 3, LightingScale> fr; | ||
| 861 | BitField<16, 3, LightingScale> rb; | ||
| 862 | BitField<20, 3, LightingScale> rg; | ||
| 863 | BitField<24, 3, LightingScale> rr; | ||
| 864 | |||
| 865 | static float GetScale(LightingScale scale) { | ||
| 866 | switch (scale) { | ||
| 867 | case LightingScale::Scale1: | ||
| 868 | return 1.0f; | ||
| 869 | case LightingScale::Scale2: | ||
| 870 | return 2.0f; | ||
| 871 | case LightingScale::Scale4: | ||
| 872 | return 4.0f; | ||
| 873 | case LightingScale::Scale8: | ||
| 874 | return 8.0f; | ||
| 875 | case LightingScale::Scale1_4: | ||
| 876 | return 0.25f; | ||
| 877 | case LightingScale::Scale1_2: | ||
| 878 | return 0.5f; | ||
| 879 | } | ||
| 880 | return 0.0f; | ||
| 881 | } | ||
| 882 | } lut_scale; | ||
| 883 | |||
| 884 | INSERT_PADDING_WORDS(0x6); | ||
| 885 | |||
| 886 | union { | ||
| 887 | // There are 8 light enable "slots", corresponding to the total number of lights | ||
| 888 | // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num' | ||
| 889 | // above), the first N slots below will be set to integers within the range of 0-7, | ||
| 890 | // corresponding to the actual light that is enabled for each slot. | ||
| 891 | |||
| 892 | BitField< 0, 3, u32> slot_0; | ||
| 893 | BitField< 4, 3, u32> slot_1; | ||
| 894 | BitField< 8, 3, u32> slot_2; | ||
| 895 | BitField<12, 3, u32> slot_3; | ||
| 896 | BitField<16, 3, u32> slot_4; | ||
| 897 | BitField<20, 3, u32> slot_5; | ||
| 898 | BitField<24, 3, u32> slot_6; | ||
| 899 | BitField<28, 3, u32> slot_7; | ||
| 900 | |||
| 901 | unsigned GetNum(unsigned index) const { | ||
| 902 | const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; | ||
| 903 | return enable_slots[index]; | ||
| 904 | } | ||
| 905 | } light_enable; | ||
| 906 | } lighting; | ||
| 907 | |||
| 908 | INSERT_PADDING_WORDS(0x26); | ||
| 645 | 909 | ||
| 646 | enum class VertexAttributeFormat : u64 { | 910 | enum class VertexAttributeFormat : u64 { |
| 647 | BYTE = 0, | 911 | BYTE = 0, |
| @@ -990,6 +1254,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68); | |||
| 990 | ASSERT_REG_POSITION(texture0_enable, 0x80); | 1254 | ASSERT_REG_POSITION(texture0_enable, 0x80); |
| 991 | ASSERT_REG_POSITION(texture0, 0x81); | 1255 | ASSERT_REG_POSITION(texture0, 0x81); |
| 992 | ASSERT_REG_POSITION(texture0_format, 0x8e); | 1256 | ASSERT_REG_POSITION(texture0_format, 0x8e); |
| 1257 | ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f); | ||
| 993 | ASSERT_REG_POSITION(texture1, 0x91); | 1258 | ASSERT_REG_POSITION(texture1, 0x91); |
| 994 | ASSERT_REG_POSITION(texture1_format, 0x96); | 1259 | ASSERT_REG_POSITION(texture1_format, 0x96); |
| 995 | ASSERT_REG_POSITION(texture2, 0x99); | 1260 | ASSERT_REG_POSITION(texture2, 0x99); |
| @@ -1004,6 +1269,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8); | |||
| 1004 | ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); | 1269 | ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); |
| 1005 | ASSERT_REG_POSITION(output_merger, 0x100); | 1270 | ASSERT_REG_POSITION(output_merger, 0x100); |
| 1006 | ASSERT_REG_POSITION(framebuffer, 0x110); | 1271 | ASSERT_REG_POSITION(framebuffer, 0x110); |
| 1272 | ASSERT_REG_POSITION(lighting, 0x140); | ||
| 1007 | ASSERT_REG_POSITION(vertex_attributes, 0x200); | 1273 | ASSERT_REG_POSITION(vertex_attributes, 0x200); |
| 1008 | ASSERT_REG_POSITION(index_array, 0x227); | 1274 | ASSERT_REG_POSITION(index_array, 0x227); |
| 1009 | ASSERT_REG_POSITION(num_vertices, 0x228); | 1275 | ASSERT_REG_POSITION(num_vertices, 0x228); |
| @@ -1026,118 +1292,6 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st | |||
| 1026 | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | 1292 | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); |
| 1027 | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | 1293 | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); |
| 1028 | 1294 | ||
| 1029 | struct float24 { | ||
| 1030 | static float24 FromFloat32(float val) { | ||
| 1031 | float24 ret; | ||
| 1032 | ret.value = val; | ||
| 1033 | return ret; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | // 16 bit mantissa, 7 bit exponent, 1 bit sign | ||
| 1037 | // TODO: No idea if this works as intended | ||
| 1038 | static float24 FromRawFloat24(u32 hex) { | ||
| 1039 | float24 ret; | ||
| 1040 | if ((hex & 0xFFFFFF) == 0) { | ||
| 1041 | ret.value = 0; | ||
| 1042 | } else { | ||
| 1043 | u32 mantissa = hex & 0xFFFF; | ||
| 1044 | u32 exponent = (hex >> 16) & 0x7F; | ||
| 1045 | u32 sign = hex >> 23; | ||
| 1046 | ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); | ||
| 1047 | if (sign) | ||
| 1048 | ret.value = -ret.value; | ||
| 1049 | } | ||
| 1050 | return ret; | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | static float24 Zero() { | ||
| 1054 | return FromFloat32(0.f); | ||
| 1055 | } | ||
| 1056 | |||
| 1057 | // Not recommended for anything but logging | ||
| 1058 | float ToFloat32() const { | ||
| 1059 | return value; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | float24 operator * (const float24& flt) const { | ||
| 1063 | if ((this->value == 0.f && !std::isnan(flt.value)) || | ||
| 1064 | (flt.value == 0.f && !std::isnan(this->value))) | ||
| 1065 | // PICA gives 0 instead of NaN when multiplying by inf | ||
| 1066 | return Zero(); | ||
| 1067 | return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | float24 operator / (const float24& flt) const { | ||
| 1071 | return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | float24 operator + (const float24& flt) const { | ||
| 1075 | return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | float24 operator - (const float24& flt) const { | ||
| 1079 | return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | float24& operator *= (const float24& flt) { | ||
| 1083 | if ((this->value == 0.f && !std::isnan(flt.value)) || | ||
| 1084 | (flt.value == 0.f && !std::isnan(this->value))) | ||
| 1085 | // PICA gives 0 instead of NaN when multiplying by inf | ||
| 1086 | *this = Zero(); | ||
| 1087 | else value *= flt.ToFloat32(); | ||
| 1088 | return *this; | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | float24& operator /= (const float24& flt) { | ||
| 1092 | value /= flt.ToFloat32(); | ||
| 1093 | return *this; | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | float24& operator += (const float24& flt) { | ||
| 1097 | value += flt.ToFloat32(); | ||
| 1098 | return *this; | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | float24& operator -= (const float24& flt) { | ||
| 1102 | value -= flt.ToFloat32(); | ||
| 1103 | return *this; | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | float24 operator - () const { | ||
| 1107 | return float24::FromFloat32(-ToFloat32()); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | bool operator < (const float24& flt) const { | ||
| 1111 | return ToFloat32() < flt.ToFloat32(); | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | bool operator > (const float24& flt) const { | ||
| 1115 | return ToFloat32() > flt.ToFloat32(); | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | bool operator >= (const float24& flt) const { | ||
| 1119 | return ToFloat32() >= flt.ToFloat32(); | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | bool operator <= (const float24& flt) const { | ||
| 1123 | return ToFloat32() <= flt.ToFloat32(); | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | bool operator == (const float24& flt) const { | ||
| 1127 | return ToFloat32() == flt.ToFloat32(); | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | bool operator != (const float24& flt) const { | ||
| 1131 | return ToFloat32() != flt.ToFloat32(); | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | private: | ||
| 1135 | // Stored as a regular float, merely for convenience | ||
| 1136 | // TODO: Perform proper arithmetic on this! | ||
| 1137 | float value; | ||
| 1138 | }; | ||
| 1139 | static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); | ||
| 1140 | |||
| 1141 | /// Struct used to describe current Pica state | 1295 | /// Struct used to describe current Pica state |
| 1142 | struct State { | 1296 | struct State { |
| 1143 | /// Pica registers | 1297 | /// Pica registers |
| @@ -1163,6 +1317,25 @@ struct State { | |||
| 1163 | ShaderSetup vs; | 1317 | ShaderSetup vs; |
| 1164 | ShaderSetup gs; | 1318 | ShaderSetup gs; |
| 1165 | 1319 | ||
| 1320 | struct { | ||
| 1321 | union LutEntry { | ||
| 1322 | // Used for raw access | ||
| 1323 | u32 raw; | ||
| 1324 | |||
| 1325 | // LUT value, encoded as 12-bit fixed point, with 12 fraction bits | ||
| 1326 | BitField< 0, 12, u32> value; | ||
| 1327 | |||
| 1328 | // Used by HW for efficient interpolation, Citra does not use these | ||
| 1329 | BitField<12, 12, u32> difference; | ||
| 1330 | |||
| 1331 | float ToFloat() { | ||
| 1332 | return static_cast<float>(value) / 4095.f; | ||
| 1333 | } | ||
| 1334 | }; | ||
| 1335 | |||
| 1336 | std::array<std::array<LutEntry, 256>, 24> luts; | ||
| 1337 | } lighting; | ||
| 1338 | |||
| 1166 | /// Current Pica command list | 1339 | /// Current Pica command list |
| 1167 | struct { | 1340 | struct { |
| 1168 | const u32* head_ptr; | 1341 | const u32* head_ptr; |
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h new file mode 100644 index 000000000..ecf45654b --- /dev/null +++ b/src/video_core/pica_types.h | |||
| @@ -0,0 +1,146 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstring> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Pica { | ||
| 12 | |||
| 13 | /** | ||
| 14 | * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision | ||
| 15 | * floating point. | ||
| 16 | * | ||
| 17 | * When decoding, format is as follows: | ||
| 18 | * - The first `M` bits are the mantissa | ||
| 19 | * - The next `E` bits are the exponent | ||
| 20 | * - The last bit is the sign bit | ||
| 21 | * | ||
| 22 | * @todo Verify on HW if this conversion is sufficiently accurate. | ||
| 23 | */ | ||
| 24 | template<unsigned M, unsigned E> | ||
| 25 | struct Float { | ||
| 26 | public: | ||
| 27 | static Float<M, E> FromFloat32(float val) { | ||
| 28 | Float<M, E> ret; | ||
| 29 | ret.value = val; | ||
| 30 | return ret; | ||
| 31 | } | ||
| 32 | |||
| 33 | static Float<M, E> FromRaw(u32 hex) { | ||
| 34 | Float<M, E> res; | ||
| 35 | |||
| 36 | const int width = M + E + 1; | ||
| 37 | const int bias = 128 - (1 << (E - 1)); | ||
| 38 | const int exponent = (hex >> M) & ((1 << E) - 1); | ||
| 39 | const unsigned mantissa = hex & ((1 << M) - 1); | ||
| 40 | |||
| 41 | if (hex & ((1 << (width - 1)) - 1)) | ||
| 42 | hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23); | ||
| 43 | else | ||
| 44 | hex = ((hex >> (E + M)) << 31); | ||
| 45 | |||
| 46 | std::memcpy(&res.value, &hex, sizeof(float)); | ||
| 47 | |||
| 48 | return res; | ||
| 49 | } | ||
| 50 | |||
| 51 | static Float<M, E> Zero() { | ||
| 52 | return FromFloat32(0.f); | ||
| 53 | } | ||
| 54 | |||
| 55 | // Not recommended for anything but logging | ||
| 56 | float ToFloat32() const { | ||
| 57 | return value; | ||
| 58 | } | ||
| 59 | |||
| 60 | Float<M, E> operator * (const Float<M, E>& flt) const { | ||
| 61 | if ((this->value == 0.f && !std::isnan(flt.value)) || | ||
| 62 | (flt.value == 0.f && !std::isnan(this->value))) | ||
| 63 | // PICA gives 0 instead of NaN when multiplying by inf | ||
| 64 | return Zero(); | ||
| 65 | return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); | ||
| 66 | } | ||
| 67 | |||
| 68 | Float<M, E> operator / (const Float<M, E>& flt) const { | ||
| 69 | return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); | ||
| 70 | } | ||
| 71 | |||
| 72 | Float<M, E> operator + (const Float<M, E>& flt) const { | ||
| 73 | return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); | ||
| 74 | } | ||
| 75 | |||
| 76 | Float<M, E> operator - (const Float<M, E>& flt) const { | ||
| 77 | return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); | ||
| 78 | } | ||
| 79 | |||
| 80 | Float<M, E>& operator *= (const Float<M, E>& flt) { | ||
| 81 | if ((this->value == 0.f && !std::isnan(flt.value)) || | ||
| 82 | (flt.value == 0.f && !std::isnan(this->value))) | ||
| 83 | // PICA gives 0 instead of NaN when multiplying by inf | ||
| 84 | *this = Zero(); | ||
| 85 | else value *= flt.ToFloat32(); | ||
| 86 | return *this; | ||
| 87 | } | ||
| 88 | |||
| 89 | Float<M, E>& operator /= (const Float<M, E>& flt) { | ||
| 90 | value /= flt.ToFloat32(); | ||
| 91 | return *this; | ||
| 92 | } | ||
| 93 | |||
| 94 | Float<M, E>& operator += (const Float<M, E>& flt) { | ||
| 95 | value += flt.ToFloat32(); | ||
| 96 | return *this; | ||
| 97 | } | ||
| 98 | |||
| 99 | Float<M, E>& operator -= (const Float<M, E>& flt) { | ||
| 100 | value -= flt.ToFloat32(); | ||
| 101 | return *this; | ||
| 102 | } | ||
| 103 | |||
| 104 | Float<M, E> operator - () const { | ||
| 105 | return Float<M, E>::FromFloat32(-ToFloat32()); | ||
| 106 | } | ||
| 107 | |||
| 108 | bool operator < (const Float<M, E>& flt) const { | ||
| 109 | return ToFloat32() < flt.ToFloat32(); | ||
| 110 | } | ||
| 111 | |||
| 112 | bool operator > (const Float<M, E>& flt) const { | ||
| 113 | return ToFloat32() > flt.ToFloat32(); | ||
| 114 | } | ||
| 115 | |||
| 116 | bool operator >= (const Float<M, E>& flt) const { | ||
| 117 | return ToFloat32() >= flt.ToFloat32(); | ||
| 118 | } | ||
| 119 | |||
| 120 | bool operator <= (const Float<M, E>& flt) const { | ||
| 121 | return ToFloat32() <= flt.ToFloat32(); | ||
| 122 | } | ||
| 123 | |||
| 124 | bool operator == (const Float<M, E>& flt) const { | ||
| 125 | return ToFloat32() == flt.ToFloat32(); | ||
| 126 | } | ||
| 127 | |||
| 128 | bool operator != (const Float<M, E>& flt) const { | ||
| 129 | return ToFloat32() != flt.ToFloat32(); | ||
| 130 | } | ||
| 131 | |||
| 132 | private: | ||
| 133 | static const unsigned MASK = (1 << (M + E + 1)) - 1; | ||
| 134 | static const unsigned MANTISSA_MASK = (1 << M) - 1; | ||
| 135 | static const unsigned EXPONENT_MASK = (1 << E) - 1; | ||
| 136 | |||
| 137 | // Stored as a regular float, merely for convenience | ||
| 138 | // TODO: Perform proper arithmetic on this! | ||
| 139 | float value; | ||
| 140 | }; | ||
| 141 | |||
| 142 | using float24 = Float<16, 7>; | ||
| 143 | using float20 = Float<12, 7>; | ||
| 144 | using float16 = Float<10, 5>; | ||
| 145 | |||
| 146 | } // namespace Pica | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 291ef737d..b7d19bf94 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -36,7 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { | |||
| 36 | stage.GetAlphaMultiplier() == 1); | 36 | stage.GetAlphaMultiplier() == 1); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr(0) { } | 39 | RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } |
| 40 | RasterizerOpenGL::~RasterizerOpenGL() { } | 40 | RasterizerOpenGL::~RasterizerOpenGL() { } |
| 41 | 41 | ||
| 42 | void RasterizerOpenGL::InitObjects() { | 42 | void RasterizerOpenGL::InitObjects() { |
| @@ -75,6 +75,12 @@ void RasterizerOpenGL::InitObjects() { | |||
| 75 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); | 75 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); |
| 76 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); | 76 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); |
| 77 | 77 | ||
| 78 | glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); | ||
| 79 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); | ||
| 80 | |||
| 81 | glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); | ||
| 82 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); | ||
| 83 | |||
| 78 | SetShader(); | 84 | SetShader(); |
| 79 | 85 | ||
| 80 | // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation | 86 | // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation |
| @@ -120,6 +126,19 @@ void RasterizerOpenGL::InitObjects() { | |||
| 120 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); | 126 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); |
| 121 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); | 127 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); |
| 122 | 128 | ||
| 129 | for (size_t i = 0; i < lighting_lut.size(); ++i) { | ||
| 130 | lighting_lut[i].Create(); | ||
| 131 | state.lighting_lut[i].texture_1d = lighting_lut[i].handle; | ||
| 132 | |||
| 133 | glActiveTexture(GL_TEXTURE3 + i); | ||
| 134 | glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d); | ||
| 135 | |||
| 136 | glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); | ||
| 137 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 138 | glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||
| 139 | } | ||
| 140 | state.Apply(); | ||
| 141 | |||
| 123 | ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, | 142 | ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, |
| 124 | "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); | 143 | "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); |
| 125 | } | 144 | } |
| @@ -139,12 +158,34 @@ void RasterizerOpenGL::Reset() { | |||
| 139 | res_cache.InvalidateAll(); | 158 | res_cache.InvalidateAll(); |
| 140 | } | 159 | } |
| 141 | 160 | ||
| 161 | /** | ||
| 162 | * This is a helper function to resolve an issue with opposite quaternions being interpolated by | ||
| 163 | * OpenGL. See below for a detailed description of this issue (yuriks): | ||
| 164 | * | ||
| 165 | * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you | ||
| 166 | * interpolate two quaternions that are opposite, instead of going from one rotation to another | ||
| 167 | * using the shortest path, you'll go around the longest path. You can test if two quaternions are | ||
| 168 | * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore | ||
| 169 | * making Dot(-Q1, W2) positive. | ||
| 170 | * | ||
| 171 | * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This | ||
| 172 | * should be correct for nearly all cases, however a more correct implementation (but less trivial | ||
| 173 | * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions | ||
| 174 | * manually using two Lerps, and doing this correction before each Lerp. | ||
| 175 | */ | ||
| 176 | static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { | ||
| 177 | Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; | ||
| 178 | Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; | ||
| 179 | |||
| 180 | return (Math::Dot(a, b) < 0.f); | ||
| 181 | } | ||
| 182 | |||
| 142 | void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, | 183 | void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, |
| 143 | const Pica::Shader::OutputVertex& v1, | 184 | const Pica::Shader::OutputVertex& v1, |
| 144 | const Pica::Shader::OutputVertex& v2) { | 185 | const Pica::Shader::OutputVertex& v2) { |
| 145 | vertex_batch.emplace_back(v0); | 186 | vertex_batch.emplace_back(v0, false); |
| 146 | vertex_batch.emplace_back(v1); | 187 | vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); |
| 147 | vertex_batch.emplace_back(v2); | 188 | vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); |
| 148 | } | 189 | } |
| 149 | 190 | ||
| 150 | void RasterizerOpenGL::DrawTriangles() { | 191 | void RasterizerOpenGL::DrawTriangles() { |
| @@ -156,6 +197,13 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 156 | state.draw.shader_dirty = false; | 197 | state.draw.shader_dirty = false; |
| 157 | } | 198 | } |
| 158 | 199 | ||
| 200 | for (unsigned index = 0; index < lighting_lut.size(); index++) { | ||
| 201 | if (uniform_block_data.lut_dirty[index]) { | ||
| 202 | SyncLightingLUT(index); | ||
| 203 | uniform_block_data.lut_dirty[index] = false; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 159 | if (uniform_block_data.dirty) { | 207 | if (uniform_block_data.dirty) { |
| 160 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); | 208 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); |
| 161 | uniform_block_data.dirty = false; | 209 | uniform_block_data.dirty = false; |
| @@ -169,16 +217,14 @@ void RasterizerOpenGL::DrawTriangles() { | |||
| 169 | // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture | 217 | // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture |
| 170 | const auto& regs = Pica::g_state.regs; | 218 | const auto& regs = Pica::g_state.regs; |
| 171 | 219 | ||
| 172 | PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | 220 | u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) |
| 173 | u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) | 221 | * fb_color_texture.width * fb_color_texture.height; |
| 174 | * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||
| 175 | 222 | ||
| 176 | PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | 223 | u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) |
| 177 | u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) | 224 | * fb_depth_texture.width * fb_depth_texture.height; |
| 178 | * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||
| 179 | 225 | ||
| 180 | res_cache.InvalidateInRange(cur_fb_color_addr, cur_fb_color_size, true); | 226 | res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); |
| 181 | res_cache.InvalidateInRange(cur_fb_depth_addr, cur_fb_depth_size, true); | 227 | res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true); |
| 182 | } | 228 | } |
| 183 | 229 | ||
| 184 | void RasterizerOpenGL::FlushFramebuffer() { | 230 | void RasterizerOpenGL::FlushFramebuffer() { |
| @@ -285,44 +331,199 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 285 | case PICA_REG_INDEX(tev_combiner_buffer_color): | 331 | case PICA_REG_INDEX(tev_combiner_buffer_color): |
| 286 | SyncCombinerColor(); | 332 | SyncCombinerColor(); |
| 287 | break; | 333 | break; |
| 334 | |||
| 335 | // Fragment lighting specular 0 color | ||
| 336 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10): | ||
| 337 | SyncLightSpecular0(0); | ||
| 338 | break; | ||
| 339 | case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10): | ||
| 340 | SyncLightSpecular0(1); | ||
| 341 | break; | ||
| 342 | case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10): | ||
| 343 | SyncLightSpecular0(2); | ||
| 344 | break; | ||
| 345 | case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10): | ||
| 346 | SyncLightSpecular0(3); | ||
| 347 | break; | ||
| 348 | case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10): | ||
| 349 | SyncLightSpecular0(4); | ||
| 350 | break; | ||
| 351 | case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10): | ||
| 352 | SyncLightSpecular0(5); | ||
| 353 | break; | ||
| 354 | case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10): | ||
| 355 | SyncLightSpecular0(6); | ||
| 356 | break; | ||
| 357 | case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10): | ||
| 358 | SyncLightSpecular0(7); | ||
| 359 | break; | ||
| 360 | |||
| 361 | // Fragment lighting specular 1 color | ||
| 362 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10): | ||
| 363 | SyncLightSpecular1(0); | ||
| 364 | break; | ||
| 365 | case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10): | ||
| 366 | SyncLightSpecular1(1); | ||
| 367 | break; | ||
| 368 | case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10): | ||
| 369 | SyncLightSpecular1(2); | ||
| 370 | break; | ||
| 371 | case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10): | ||
| 372 | SyncLightSpecular1(3); | ||
| 373 | break; | ||
| 374 | case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10): | ||
| 375 | SyncLightSpecular1(4); | ||
| 376 | break; | ||
| 377 | case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10): | ||
| 378 | SyncLightSpecular1(5); | ||
| 379 | break; | ||
| 380 | case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10): | ||
| 381 | SyncLightSpecular1(6); | ||
| 382 | break; | ||
| 383 | case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10): | ||
| 384 | SyncLightSpecular1(7); | ||
| 385 | break; | ||
| 386 | |||
| 387 | // Fragment lighting diffuse color | ||
| 388 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10): | ||
| 389 | SyncLightDiffuse(0); | ||
| 390 | break; | ||
| 391 | case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10): | ||
| 392 | SyncLightDiffuse(1); | ||
| 393 | break; | ||
| 394 | case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10): | ||
| 395 | SyncLightDiffuse(2); | ||
| 396 | break; | ||
| 397 | case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10): | ||
| 398 | SyncLightDiffuse(3); | ||
| 399 | break; | ||
| 400 | case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10): | ||
| 401 | SyncLightDiffuse(4); | ||
| 402 | break; | ||
| 403 | case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10): | ||
| 404 | SyncLightDiffuse(5); | ||
| 405 | break; | ||
| 406 | case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10): | ||
| 407 | SyncLightDiffuse(6); | ||
| 408 | break; | ||
| 409 | case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10): | ||
| 410 | SyncLightDiffuse(7); | ||
| 411 | break; | ||
| 412 | |||
| 413 | // Fragment lighting ambient color | ||
| 414 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10): | ||
| 415 | SyncLightAmbient(0); | ||
| 416 | break; | ||
| 417 | case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10): | ||
| 418 | SyncLightAmbient(1); | ||
| 419 | break; | ||
| 420 | case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10): | ||
| 421 | SyncLightAmbient(2); | ||
| 422 | break; | ||
| 423 | case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10): | ||
| 424 | SyncLightAmbient(3); | ||
| 425 | break; | ||
| 426 | case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10): | ||
| 427 | SyncLightAmbient(4); | ||
| 428 | break; | ||
| 429 | case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10): | ||
| 430 | SyncLightAmbient(5); | ||
| 431 | break; | ||
| 432 | case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10): | ||
| 433 | SyncLightAmbient(6); | ||
| 434 | break; | ||
| 435 | case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10): | ||
| 436 | SyncLightAmbient(7); | ||
| 437 | break; | ||
| 438 | |||
| 439 | // Fragment lighting position | ||
| 440 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): | ||
| 441 | case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): | ||
| 442 | SyncLightPosition(0); | ||
| 443 | break; | ||
| 444 | case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10): | ||
| 445 | case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10): | ||
| 446 | SyncLightPosition(1); | ||
| 447 | break; | ||
| 448 | case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10): | ||
| 449 | case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10): | ||
| 450 | SyncLightPosition(2); | ||
| 451 | break; | ||
| 452 | case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10): | ||
| 453 | case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10): | ||
| 454 | SyncLightPosition(3); | ||
| 455 | break; | ||
| 456 | case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10): | ||
| 457 | case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10): | ||
| 458 | SyncLightPosition(4); | ||
| 459 | break; | ||
| 460 | case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10): | ||
| 461 | case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10): | ||
| 462 | SyncLightPosition(5); | ||
| 463 | break; | ||
| 464 | case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10): | ||
| 465 | case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10): | ||
| 466 | SyncLightPosition(6); | ||
| 467 | break; | ||
| 468 | case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10): | ||
| 469 | case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10): | ||
| 470 | SyncLightPosition(7); | ||
| 471 | break; | ||
| 472 | |||
| 473 | // Fragment lighting global ambient color (emission + ambient * ambient) | ||
| 474 | case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0): | ||
| 475 | SyncGlobalAmbient(); | ||
| 476 | break; | ||
| 477 | |||
| 478 | // Fragment lighting lookup tables | ||
| 479 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): | ||
| 480 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): | ||
| 481 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): | ||
| 482 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): | ||
| 483 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): | ||
| 484 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): | ||
| 485 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): | ||
| 486 | case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): | ||
| 487 | { | ||
| 488 | auto& lut_config = regs.lighting.lut_config; | ||
| 489 | uniform_block_data.lut_dirty[lut_config.type / 4] = true; | ||
| 490 | break; | ||
| 491 | } | ||
| 492 | |||
| 288 | } | 493 | } |
| 289 | } | 494 | } |
| 290 | 495 | ||
| 291 | void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { | 496 | void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { |
| 292 | const auto& regs = Pica::g_state.regs; | 497 | const auto& regs = Pica::g_state.regs; |
| 293 | 498 | ||
| 294 | PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | 499 | u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) |
| 295 | u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) | 500 | * fb_color_texture.width * fb_color_texture.height; |
| 296 | * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||
| 297 | 501 | ||
| 298 | PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | 502 | u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) |
| 299 | u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) | 503 | * fb_depth_texture.width * fb_depth_texture.height; |
| 300 | * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||
| 301 | 504 | ||
| 302 | // If source memory region overlaps 3DS framebuffers, commit them before the copy happens | 505 | // If source memory region overlaps 3DS framebuffers, commit them before the copy happens |
| 303 | if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) | 506 | if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) |
| 304 | CommitColorBuffer(); | 507 | CommitColorBuffer(); |
| 305 | 508 | ||
| 306 | if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) | 509 | if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) |
| 307 | CommitDepthBuffer(); | 510 | CommitDepthBuffer(); |
| 308 | } | 511 | } |
| 309 | 512 | ||
| 310 | void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { | 513 | void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { |
| 311 | const auto& regs = Pica::g_state.regs; | 514 | const auto& regs = Pica::g_state.regs; |
| 312 | 515 | ||
| 313 | PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | 516 | u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) |
| 314 | u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) | 517 | * fb_color_texture.width * fb_color_texture.height; |
| 315 | * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||
| 316 | 518 | ||
| 317 | PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | 519 | u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) |
| 318 | u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) | 520 | * fb_depth_texture.width * fb_depth_texture.height; |
| 319 | * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); | ||
| 320 | 521 | ||
| 321 | // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL | 522 | // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL |
| 322 | if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) | 523 | if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) |
| 323 | ReloadColorBuffer(); | 524 | ReloadColorBuffer(); |
| 324 | 525 | ||
| 325 | if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) | 526 | if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) |
| 326 | ReloadDepthBuffer(); | 527 | ReloadDepthBuffer(); |
| 327 | 528 | ||
| 328 | // Notify cache of flush in case the region touches a cached resource | 529 | // Notify cache of flush in case the region touches a cached resource |
| @@ -497,27 +698,48 @@ void RasterizerOpenGL::SetShader() { | |||
| 497 | uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); | 698 | uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); |
| 498 | if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } | 699 | if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } |
| 499 | 700 | ||
| 701 | // Set the texture samplers to correspond to different lookup table texture units | ||
| 702 | GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); | ||
| 703 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } | ||
| 704 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); | ||
| 705 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } | ||
| 706 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); | ||
| 707 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } | ||
| 708 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); | ||
| 709 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } | ||
| 710 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); | ||
| 711 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } | ||
| 712 | uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); | ||
| 713 | if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } | ||
| 714 | |||
| 500 | current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); | 715 | current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); |
| 501 | 716 | ||
| 502 | unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); | 717 | unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); |
| 503 | glUniformBlockBinding(current_shader->shader.handle, block_index, 0); | 718 | glUniformBlockBinding(current_shader->shader.handle, block_index, 0); |
| 504 | } | ||
| 505 | 719 | ||
| 506 | // Update uniforms | 720 | // Update uniforms |
| 507 | SyncAlphaTest(); | 721 | SyncAlphaTest(); |
| 508 | SyncCombinerColor(); | 722 | SyncCombinerColor(); |
| 509 | auto& tev_stages = Pica::g_state.regs.GetTevStages(); | 723 | auto& tev_stages = Pica::g_state.regs.GetTevStages(); |
| 510 | for (int index = 0; index < tev_stages.size(); ++index) | 724 | for (int index = 0; index < tev_stages.size(); ++index) |
| 511 | SyncTevConstColor(index, tev_stages[index]); | 725 | SyncTevConstColor(index, tev_stages[index]); |
| 726 | |||
| 727 | SyncGlobalAmbient(); | ||
| 728 | for (int light_index = 0; light_index < 8; light_index++) { | ||
| 729 | SyncLightDiffuse(light_index); | ||
| 730 | SyncLightAmbient(light_index); | ||
| 731 | SyncLightPosition(light_index); | ||
| 732 | } | ||
| 733 | } | ||
| 512 | } | 734 | } |
| 513 | 735 | ||
| 514 | void RasterizerOpenGL::SyncFramebuffer() { | 736 | void RasterizerOpenGL::SyncFramebuffer() { |
| 515 | const auto& regs = Pica::g_state.regs; | 737 | const auto& regs = Pica::g_state.regs; |
| 516 | 738 | ||
| 517 | PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | 739 | PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); |
| 518 | Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; | 740 | Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; |
| 519 | 741 | ||
| 520 | PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | 742 | PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); |
| 521 | Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; | 743 | Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; |
| 522 | 744 | ||
| 523 | bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) || | 745 | bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) || |
| @@ -529,10 +751,10 @@ void RasterizerOpenGL::SyncFramebuffer() { | |||
| 529 | bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || | 751 | bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || |
| 530 | fb_size_changed; | 752 | fb_size_changed; |
| 531 | 753 | ||
| 532 | bool color_fb_modified = last_fb_color_addr != cur_fb_color_addr || | 754 | bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr || |
| 533 | color_fb_prop_changed; | 755 | color_fb_prop_changed; |
| 534 | 756 | ||
| 535 | bool depth_fb_modified = last_fb_depth_addr != cur_fb_depth_addr || | 757 | bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr || |
| 536 | depth_fb_prop_changed; | 758 | depth_fb_prop_changed; |
| 537 | 759 | ||
| 538 | // Commit if framebuffer modified in any way | 760 | // Commit if framebuffer modified in any way |
| @@ -572,13 +794,13 @@ void RasterizerOpenGL::SyncFramebuffer() { | |||
| 572 | 794 | ||
| 573 | // Load buffer data again if fb modified in any way | 795 | // Load buffer data again if fb modified in any way |
| 574 | if (color_fb_modified) { | 796 | if (color_fb_modified) { |
| 575 | last_fb_color_addr = cur_fb_color_addr; | 797 | cached_fb_color_addr = new_fb_color_addr; |
| 576 | 798 | ||
| 577 | ReloadColorBuffer(); | 799 | ReloadColorBuffer(); |
| 578 | } | 800 | } |
| 579 | 801 | ||
| 580 | if (depth_fb_modified) { | 802 | if (depth_fb_modified) { |
| 581 | last_fb_depth_addr = cur_fb_depth_addr; | 803 | cached_fb_depth_addr = new_fb_depth_addr; |
| 582 | 804 | ||
| 583 | ReloadDepthBuffer(); | 805 | ReloadDepthBuffer(); |
| 584 | } | 806 | } |
| @@ -610,8 +832,8 @@ void RasterizerOpenGL::SyncCullMode() { | |||
| 610 | } | 832 | } |
| 611 | 833 | ||
| 612 | void RasterizerOpenGL::SyncDepthModifiers() { | 834 | void RasterizerOpenGL::SyncDepthModifiers() { |
| 613 | float depth_scale = -Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_range).ToFloat32(); | 835 | float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); |
| 614 | float depth_offset = Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; | 836 | float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; |
| 615 | 837 | ||
| 616 | // TODO: Implement scale modifier | 838 | // TODO: Implement scale modifier |
| 617 | uniform_block_data.data.depth_offset = depth_offset; | 839 | uniform_block_data.data.depth_offset = depth_offset; |
| @@ -689,12 +911,81 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS | |||
| 689 | } | 911 | } |
| 690 | } | 912 | } |
| 691 | 913 | ||
| 914 | void RasterizerOpenGL::SyncGlobalAmbient() { | ||
| 915 | auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); | ||
| 916 | if (color != uniform_block_data.data.lighting_global_ambient) { | ||
| 917 | uniform_block_data.data.lighting_global_ambient = color; | ||
| 918 | uniform_block_data.dirty = true; | ||
| 919 | } | ||
| 920 | } | ||
| 921 | |||
| 922 | void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { | ||
| 923 | std::array<GLvec4, 256> new_data; | ||
| 924 | |||
| 925 | for (unsigned offset = 0; offset < new_data.size(); ++offset) { | ||
| 926 | new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); | ||
| 927 | new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat(); | ||
| 928 | new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat(); | ||
| 929 | new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat(); | ||
| 930 | } | ||
| 931 | |||
| 932 | if (new_data != lighting_lut_data[lut_index]) { | ||
| 933 | lighting_lut_data[lut_index] = new_data; | ||
| 934 | glActiveTexture(GL_TEXTURE3 + lut_index); | ||
| 935 | glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); | ||
| 936 | } | ||
| 937 | } | ||
| 938 | |||
| 939 | void RasterizerOpenGL::SyncLightSpecular0(int light_index) { | ||
| 940 | auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); | ||
| 941 | if (color != uniform_block_data.data.light_src[light_index].specular_0) { | ||
| 942 | uniform_block_data.data.light_src[light_index].specular_0 = color; | ||
| 943 | uniform_block_data.dirty = true; | ||
| 944 | } | ||
| 945 | } | ||
| 946 | |||
| 947 | void RasterizerOpenGL::SyncLightSpecular1(int light_index) { | ||
| 948 | auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); | ||
| 949 | if (color != uniform_block_data.data.light_src[light_index].specular_1) { | ||
| 950 | uniform_block_data.data.light_src[light_index].specular_1 = color; | ||
| 951 | uniform_block_data.dirty = true; | ||
| 952 | } | ||
| 953 | } | ||
| 954 | |||
| 955 | void RasterizerOpenGL::SyncLightDiffuse(int light_index) { | ||
| 956 | auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); | ||
| 957 | if (color != uniform_block_data.data.light_src[light_index].diffuse) { | ||
| 958 | uniform_block_data.data.light_src[light_index].diffuse = color; | ||
| 959 | uniform_block_data.dirty = true; | ||
| 960 | } | ||
| 961 | } | ||
| 962 | |||
| 963 | void RasterizerOpenGL::SyncLightAmbient(int light_index) { | ||
| 964 | auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); | ||
| 965 | if (color != uniform_block_data.data.light_src[light_index].ambient) { | ||
| 966 | uniform_block_data.data.light_src[light_index].ambient = color; | ||
| 967 | uniform_block_data.dirty = true; | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | void RasterizerOpenGL::SyncLightPosition(int light_index) { | ||
| 972 | GLvec3 position = { | ||
| 973 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), | ||
| 974 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), | ||
| 975 | Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; | ||
| 976 | |||
| 977 | if (position != uniform_block_data.data.light_src[light_index].position) { | ||
| 978 | uniform_block_data.data.light_src[light_index].position = position; | ||
| 979 | uniform_block_data.dirty = true; | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 692 | void RasterizerOpenGL::SyncDrawState() { | 983 | void RasterizerOpenGL::SyncDrawState() { |
| 693 | const auto& regs = Pica::g_state.regs; | 984 | const auto& regs = Pica::g_state.regs; |
| 694 | 985 | ||
| 695 | // Sync the viewport | 986 | // Sync the viewport |
| 696 | GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; | 987 | GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; |
| 697 | GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; | 988 | GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; |
| 698 | 989 | ||
| 699 | // OpenGL uses different y coordinates, so negate corner offset and flip origin | 990 | // OpenGL uses different y coordinates, so negate corner offset and flip origin |
| 700 | // TODO: Ensure viewport_corner.x should not be negated or origin flipped | 991 | // TODO: Ensure viewport_corner.x should not be negated or origin flipped |
| @@ -723,7 +1014,7 @@ void RasterizerOpenGL::SyncDrawState() { | |||
| 723 | MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); | 1014 | MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); |
| 724 | 1015 | ||
| 725 | void RasterizerOpenGL::ReloadColorBuffer() { | 1016 | void RasterizerOpenGL::ReloadColorBuffer() { |
| 726 | u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); | 1017 | u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); |
| 727 | 1018 | ||
| 728 | if (color_buffer == nullptr) | 1019 | if (color_buffer == nullptr) |
| 729 | return; | 1020 | return; |
| @@ -758,13 +1049,11 @@ void RasterizerOpenGL::ReloadColorBuffer() { | |||
| 758 | } | 1049 | } |
| 759 | 1050 | ||
| 760 | void RasterizerOpenGL::ReloadDepthBuffer() { | 1051 | void RasterizerOpenGL::ReloadDepthBuffer() { |
| 761 | PAddr depth_buffer_addr = Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress(); | 1052 | if (cached_fb_depth_addr == 0) |
| 762 | |||
| 763 | if (depth_buffer_addr == 0) | ||
| 764 | return; | 1053 | return; |
| 765 | 1054 | ||
| 766 | // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil | 1055 | // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil |
| 767 | u8* depth_buffer = Memory::GetPhysicalPointer(depth_buffer_addr); | 1056 | u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); |
| 768 | 1057 | ||
| 769 | if (depth_buffer == nullptr) | 1058 | if (depth_buffer == nullptr) |
| 770 | return; | 1059 | return; |
| @@ -827,8 +1116,8 @@ Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit"); | |||
| 827 | MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); | 1116 | MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); |
| 828 | 1117 | ||
| 829 | void RasterizerOpenGL::CommitColorBuffer() { | 1118 | void RasterizerOpenGL::CommitColorBuffer() { |
| 830 | if (last_fb_color_addr != 0) { | 1119 | if (cached_fb_color_addr != 0) { |
| 831 | u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr); | 1120 | u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); |
| 832 | 1121 | ||
| 833 | if (color_buffer != nullptr) { | 1122 | if (color_buffer != nullptr) { |
| 834 | Common::Profiling::ScopeTimer timer(buffer_commit_category); | 1123 | Common::Profiling::ScopeTimer timer(buffer_commit_category); |
| @@ -863,9 +1152,9 @@ void RasterizerOpenGL::CommitColorBuffer() { | |||
| 863 | } | 1152 | } |
| 864 | 1153 | ||
| 865 | void RasterizerOpenGL::CommitDepthBuffer() { | 1154 | void RasterizerOpenGL::CommitDepthBuffer() { |
| 866 | if (last_fb_depth_addr != 0) { | 1155 | if (cached_fb_depth_addr != 0) { |
| 867 | // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. | 1156 | // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. |
| 868 | u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr); | 1157 | u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); |
| 869 | 1158 | ||
| 870 | if (depth_buffer != nullptr) { | 1159 | if (depth_buffer != nullptr) { |
| 871 | Common::Profiling::ScopeTimer timer(buffer_commit_category); | 1160 | Common::Profiling::ScopeTimer timer(buffer_commit_category); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c8a2d8f16..fef5f5331 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 18 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 19 | #include "video_core/renderer_opengl/gl_state.h" | 19 | #include "video_core/renderer_opengl/gl_state.h" |
| 20 | #include "video_core/renderer_opengl/pica_to_gl.h" | ||
| 20 | #include "video_core/shader/shader_interpreter.h" | 21 | #include "video_core/shader/shader_interpreter.h" |
| 21 | 22 | ||
| 22 | /** | 23 | /** |
| @@ -71,6 +72,59 @@ struct PicaShaderConfig { | |||
| 71 | regs.tev_combiner_buffer_input.update_mask_rgb.Value() | | 72 | regs.tev_combiner_buffer_input.update_mask_rgb.Value() | |
| 72 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; | 73 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; |
| 73 | 74 | ||
| 75 | // Fragment lighting | ||
| 76 | |||
| 77 | res.lighting.enable = !regs.lighting.disable; | ||
| 78 | res.lighting.src_num = regs.lighting.num_lights + 1; | ||
| 79 | |||
| 80 | for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { | ||
| 81 | unsigned num = regs.lighting.light_enable.GetNum(light_index); | ||
| 82 | const auto& light = regs.lighting.light[num]; | ||
| 83 | res.lighting.light[light_index].num = num; | ||
| 84 | res.lighting.light[light_index].directional = light.directional != 0; | ||
| 85 | res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; | ||
| 86 | res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); | ||
| 87 | res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); | ||
| 88 | res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); | ||
| 89 | } | ||
| 90 | |||
| 91 | res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; | ||
| 92 | res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; | ||
| 93 | res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); | ||
| 94 | res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); | ||
| 95 | |||
| 96 | res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; | ||
| 97 | res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; | ||
| 98 | res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); | ||
| 99 | res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); | ||
| 100 | |||
| 101 | res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; | ||
| 102 | res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; | ||
| 103 | res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); | ||
| 104 | res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); | ||
| 105 | |||
| 106 | res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; | ||
| 107 | res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; | ||
| 108 | res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); | ||
| 109 | res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); | ||
| 110 | |||
| 111 | res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; | ||
| 112 | res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; | ||
| 113 | res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); | ||
| 114 | res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); | ||
| 115 | |||
| 116 | res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; | ||
| 117 | res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; | ||
| 118 | res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); | ||
| 119 | res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); | ||
| 120 | |||
| 121 | res.lighting.config = regs.lighting.config; | ||
| 122 | res.lighting.fresnel_selector = regs.lighting.fresnel_selector; | ||
| 123 | res.lighting.bump_mode = regs.lighting.bump_mode; | ||
| 124 | res.lighting.bump_selector = regs.lighting.bump_selector; | ||
| 125 | res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; | ||
| 126 | res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; | ||
| 127 | |||
| 74 | return res; | 128 | return res; |
| 75 | } | 129 | } |
| 76 | 130 | ||
| @@ -86,9 +140,37 @@ struct PicaShaderConfig { | |||
| 86 | return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; | 140 | return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; |
| 87 | }; | 141 | }; |
| 88 | 142 | ||
| 89 | Pica::Regs::CompareFunc alpha_test_func; | 143 | Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; |
| 90 | std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; | 144 | std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; |
| 91 | u8 combiner_buffer_input; | 145 | u8 combiner_buffer_input = 0; |
| 146 | |||
| 147 | struct { | ||
| 148 | struct { | ||
| 149 | unsigned num = 0; | ||
| 150 | bool directional = false; | ||
| 151 | bool two_sided_diffuse = false; | ||
| 152 | bool dist_atten_enable = false; | ||
| 153 | GLfloat dist_atten_scale = 0.0f; | ||
| 154 | GLfloat dist_atten_bias = 0.0f; | ||
| 155 | } light[8]; | ||
| 156 | |||
| 157 | bool enable = false; | ||
| 158 | unsigned src_num = 0; | ||
| 159 | Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; | ||
| 160 | unsigned bump_selector = 0; | ||
| 161 | bool bump_renorm = false; | ||
| 162 | bool clamp_highlights = false; | ||
| 163 | |||
| 164 | Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; | ||
| 165 | Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; | ||
| 166 | |||
| 167 | struct { | ||
| 168 | bool enable = false; | ||
| 169 | bool abs_input = false; | ||
| 170 | Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; | ||
| 171 | float scale = 1.0f; | ||
| 172 | } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; | ||
| 173 | } lighting; | ||
| 92 | }; | 174 | }; |
| 93 | 175 | ||
| 94 | namespace std { | 176 | namespace std { |
| @@ -167,7 +249,7 @@ private: | |||
| 167 | 249 | ||
| 168 | /// Structure that the hardware rendered vertices are composed of | 250 | /// Structure that the hardware rendered vertices are composed of |
| 169 | struct HardwareVertex { | 251 | struct HardwareVertex { |
| 170 | HardwareVertex(const Pica::Shader::OutputVertex& v) { | 252 | HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { |
| 171 | position[0] = v.pos.x.ToFloat32(); | 253 | position[0] = v.pos.x.ToFloat32(); |
| 172 | position[1] = v.pos.y.ToFloat32(); | 254 | position[1] = v.pos.y.ToFloat32(); |
| 173 | position[2] = v.pos.z.ToFloat32(); | 255 | position[2] = v.pos.z.ToFloat32(); |
| @@ -182,6 +264,19 @@ private: | |||
| 182 | tex_coord1[1] = v.tc1.y.ToFloat32(); | 264 | tex_coord1[1] = v.tc1.y.ToFloat32(); |
| 183 | tex_coord2[0] = v.tc2.x.ToFloat32(); | 265 | tex_coord2[0] = v.tc2.x.ToFloat32(); |
| 184 | tex_coord2[1] = v.tc2.y.ToFloat32(); | 266 | tex_coord2[1] = v.tc2.y.ToFloat32(); |
| 267 | normquat[0] = v.quat.x.ToFloat32(); | ||
| 268 | normquat[1] = v.quat.y.ToFloat32(); | ||
| 269 | normquat[2] = v.quat.z.ToFloat32(); | ||
| 270 | normquat[3] = v.quat.w.ToFloat32(); | ||
| 271 | view[0] = v.view.x.ToFloat32(); | ||
| 272 | view[1] = v.view.y.ToFloat32(); | ||
| 273 | view[2] = v.view.z.ToFloat32(); | ||
| 274 | |||
| 275 | if (flip_quaternion) { | ||
| 276 | for (float& x : normquat) { | ||
| 277 | x = -x; | ||
| 278 | } | ||
| 279 | } | ||
| 185 | } | 280 | } |
| 186 | 281 | ||
| 187 | GLfloat position[4]; | 282 | GLfloat position[4]; |
| @@ -189,20 +284,31 @@ private: | |||
| 189 | GLfloat tex_coord0[2]; | 284 | GLfloat tex_coord0[2]; |
| 190 | GLfloat tex_coord1[2]; | 285 | GLfloat tex_coord1[2]; |
| 191 | GLfloat tex_coord2[2]; | 286 | GLfloat tex_coord2[2]; |
| 287 | GLfloat normquat[4]; | ||
| 288 | GLfloat view[3]; | ||
| 289 | }; | ||
| 290 | |||
| 291 | struct LightSrc { | ||
| 292 | alignas(16) GLvec3 specular_0; | ||
| 293 | alignas(16) GLvec3 specular_1; | ||
| 294 | alignas(16) GLvec3 diffuse; | ||
| 295 | alignas(16) GLvec3 ambient; | ||
| 296 | alignas(16) GLvec3 position; | ||
| 192 | }; | 297 | }; |
| 193 | 298 | ||
| 194 | /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned | 299 | /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned |
| 195 | struct UniformData { | 300 | struct UniformData { |
| 196 | // A vec4 color for each of the six tev stages | 301 | // A vec4 color for each of the six tev stages |
| 197 | std::array<GLfloat, 4> const_color[6]; | 302 | GLvec4 const_color[6]; |
| 198 | std::array<GLfloat, 4> tev_combiner_buffer_color; | 303 | GLvec4 tev_combiner_buffer_color; |
| 199 | GLint alphatest_ref; | 304 | GLint alphatest_ref; |
| 200 | GLfloat depth_offset; | 305 | GLfloat depth_offset; |
| 201 | INSERT_PADDING_BYTES(8); | 306 | alignas(16) GLvec3 lighting_global_ambient; |
| 307 | LightSrc light_src[8]; | ||
| 202 | }; | 308 | }; |
| 203 | 309 | ||
| 204 | static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader"); | 310 | static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); |
| 205 | static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec"); | 311 | static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); |
| 206 | 312 | ||
| 207 | /// Reconfigure the OpenGL color texture to use the given format and dimensions | 313 | /// Reconfigure the OpenGL color texture to use the given format and dimensions |
| 208 | void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); | 314 | void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); |
| @@ -249,6 +355,27 @@ private: | |||
| 249 | /// Syncs the TEV combiner color buffer to match the PICA register | 355 | /// Syncs the TEV combiner color buffer to match the PICA register |
| 250 | void SyncCombinerColor(); | 356 | void SyncCombinerColor(); |
| 251 | 357 | ||
| 358 | /// Syncs the lighting global ambient color to match the PICA register | ||
| 359 | void SyncGlobalAmbient(); | ||
| 360 | |||
| 361 | /// Syncs the lighting lookup tables | ||
| 362 | void SyncLightingLUT(unsigned index); | ||
| 363 | |||
| 364 | /// Syncs the specified light's diffuse color to match the PICA register | ||
| 365 | void SyncLightDiffuse(int light_index); | ||
| 366 | |||
| 367 | /// Syncs the specified light's ambient color to match the PICA register | ||
| 368 | void SyncLightAmbient(int light_index); | ||
| 369 | |||
| 370 | /// Syncs the specified light's position to match the PICA register | ||
| 371 | void SyncLightPosition(int light_index); | ||
| 372 | |||
| 373 | /// Syncs the specified light's specular 0 color to match the PICA register | ||
| 374 | void SyncLightSpecular0(int light_index); | ||
| 375 | |||
| 376 | /// Syncs the specified light's specular 1 color to match the PICA register | ||
| 377 | void SyncLightSpecular1(int light_index); | ||
| 378 | |||
| 252 | /// Syncs the remaining OpenGL drawing state to match the current PICA state | 379 | /// Syncs the remaining OpenGL drawing state to match the current PICA state |
| 253 | void SyncDrawState(); | 380 | void SyncDrawState(); |
| 254 | 381 | ||
| @@ -278,8 +405,8 @@ private: | |||
| 278 | 405 | ||
| 279 | OpenGLState state; | 406 | OpenGLState state; |
| 280 | 407 | ||
| 281 | PAddr last_fb_color_addr; | 408 | PAddr cached_fb_color_addr; |
| 282 | PAddr last_fb_depth_addr; | 409 | PAddr cached_fb_depth_addr; |
| 283 | 410 | ||
| 284 | // Hardware rasterizer | 411 | // Hardware rasterizer |
| 285 | std::array<SamplerInfo, 3> texture_samplers; | 412 | std::array<SamplerInfo, 3> texture_samplers; |
| @@ -291,6 +418,7 @@ private: | |||
| 291 | 418 | ||
| 292 | struct { | 419 | struct { |
| 293 | UniformData data; | 420 | UniformData data; |
| 421 | bool lut_dirty[6]; | ||
| 294 | bool dirty; | 422 | bool dirty; |
| 295 | } uniform_block_data; | 423 | } uniform_block_data; |
| 296 | 424 | ||
| @@ -298,4 +426,7 @@ private: | |||
| 298 | OGLBuffer vertex_buffer; | 426 | OGLBuffer vertex_buffer; |
| 299 | OGLBuffer uniform_buffer; | 427 | OGLBuffer uniform_buffer; |
| 300 | OGLFramebuffer framebuffer; | 428 | OGLFramebuffer framebuffer; |
| 429 | |||
| 430 | std::array<OGLTexture, 6> lighting_lut; | ||
| 431 | std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; | ||
| 301 | }; | 432 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 22022f7f4..ee4b54ab9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -32,12 +32,10 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, | |||
| 32 | out += "primary_color"; | 32 | out += "primary_color"; |
| 33 | break; | 33 | break; |
| 34 | case Source::PrimaryFragmentColor: | 34 | case Source::PrimaryFragmentColor: |
| 35 | // HACK: Until we implement fragment lighting, use primary_color | 35 | out += "primary_fragment_color"; |
| 36 | out += "primary_color"; | ||
| 37 | break; | 36 | break; |
| 38 | case Source::SecondaryFragmentColor: | 37 | case Source::SecondaryFragmentColor: |
| 39 | // HACK: Until we implement fragment lighting, use zero | 38 | out += "secondary_fragment_color"; |
| 40 | out += "vec4(0.0)"; | ||
| 41 | break; | 39 | break; |
| 42 | case Source::Texture0: | 40 | case Source::Texture0: |
| 43 | out += "texture(tex[0], texcoord[0])"; | 41 | out += "texture(tex[0], texcoord[0])"; |
| @@ -320,26 +318,229 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi | |||
| 320 | out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; | 318 | out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; |
| 321 | } | 319 | } |
| 322 | 320 | ||
| 321 | /// Writes the code to emulate fragment lighting | ||
| 322 | static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | ||
| 323 | // Define lighting globals | ||
| 324 | out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" | ||
| 325 | "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" | ||
| 326 | "vec3 light_vector = vec3(0.0);\n" | ||
| 327 | "vec3 refl_value = vec3(0.0);\n"; | ||
| 328 | |||
| 329 | // Compute fragment normals | ||
| 330 | if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { | ||
| 331 | // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture | ||
| 332 | std::string bump_selector = std::to_string(config.lighting.bump_selector); | ||
| 333 | out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; | ||
| 334 | |||
| 335 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result | ||
| 336 | if (config.lighting.bump_renorm) { | ||
| 337 | std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; | ||
| 338 | out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; | ||
| 339 | } | ||
| 340 | } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { | ||
| 341 | // Bump mapping is enabled using a tangent map | ||
| 342 | LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); | ||
| 343 | UNIMPLEMENTED(); | ||
| 344 | } else { | ||
| 345 | // No bump mapping - surface local normal is just a unit normal | ||
| 346 | out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; | ||
| 347 | } | ||
| 348 | |||
| 349 | // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace | ||
| 350 | out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; | ||
| 351 | |||
| 352 | // Gets the index into the specified lookup table for specular lighting | ||
| 353 | auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { | ||
| 354 | const std::string half_angle = "normalize(normalize(view) + light_vector)"; | ||
| 355 | std::string index; | ||
| 356 | switch (input) { | ||
| 357 | case Regs::LightingLutInput::NH: | ||
| 358 | index = "dot(normal, " + half_angle + ")"; | ||
| 359 | break; | ||
| 360 | |||
| 361 | case Regs::LightingLutInput::VH: | ||
| 362 | index = std::string("dot(normalize(view), " + half_angle + ")"); | ||
| 363 | break; | ||
| 364 | |||
| 365 | case Regs::LightingLutInput::NV: | ||
| 366 | index = std::string("dot(normal, normalize(view))"); | ||
| 367 | break; | ||
| 368 | |||
| 369 | case Regs::LightingLutInput::LN: | ||
| 370 | index = std::string("dot(light_vector, normal)"); | ||
| 371 | break; | ||
| 372 | |||
| 373 | default: | ||
| 374 | LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); | ||
| 375 | UNIMPLEMENTED(); | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | |||
| 379 | if (abs) { | ||
| 380 | // LUT index is in the range of (0.0, 1.0) | ||
| 381 | index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; | ||
| 382 | return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; | ||
| 383 | } else { | ||
| 384 | // LUT index is in the range of (-1.0, 1.0) | ||
| 385 | index = "clamp(" + index + ", -1.0, 1.0)"; | ||
| 386 | return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; | ||
| 387 | } | ||
| 388 | |||
| 389 | return std::string(); | ||
| 390 | }; | ||
| 391 | |||
| 392 | // Gets the lighting lookup table value given the specified sampler and index | ||
| 393 | auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) { | ||
| 394 | return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " + | ||
| 395 | lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]"); | ||
| 396 | }; | ||
| 397 | |||
| 398 | // Write the code to emulate each enabled light | ||
| 399 | for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { | ||
| 400 | const auto& light_config = config.lighting.light[light_index]; | ||
| 401 | std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; | ||
| 402 | |||
| 403 | // Compute light vector (directional or positional) | ||
| 404 | if (light_config.directional) | ||
| 405 | out += "light_vector = normalize(" + light_src + ".position);\n"; | ||
| 406 | else | ||
| 407 | out += "light_vector = normalize(" + light_src + ".position + view);\n"; | ||
| 408 | |||
| 409 | // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided | ||
| 410 | std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; | ||
| 411 | |||
| 412 | // If enabled, compute distance attenuation value | ||
| 413 | std::string dist_atten = "1.0"; | ||
| 414 | if (light_config.dist_atten_enable) { | ||
| 415 | std::string scale = std::to_string(light_config.dist_atten_scale); | ||
| 416 | std::string bias = std::to_string(light_config.dist_atten_bias); | ||
| 417 | std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; | ||
| 418 | index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; | ||
| 419 | const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); | ||
| 420 | dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); | ||
| 421 | } | ||
| 422 | |||
| 423 | // If enabled, clamp specular component if lighting result is negative | ||
| 424 | std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; | ||
| 425 | |||
| 426 | // Specular 0 component | ||
| 427 | std::string d0_lut_value = "1.0"; | ||
| 428 | if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { | ||
| 429 | // Lookup specular "distribution 0" LUT value | ||
| 430 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); | ||
| 431 | d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; | ||
| 432 | } | ||
| 433 | std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; | ||
| 434 | |||
| 435 | // If enabled, lookup ReflectRed value, otherwise, 1.0 is used | ||
| 436 | if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { | ||
| 437 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); | ||
| 438 | std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; | ||
| 439 | out += "refl_value.r = " + value + ";\n"; | ||
| 440 | } else { | ||
| 441 | out += "refl_value.r = 1.0;\n"; | ||
| 442 | } | ||
| 443 | |||
| 444 | // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used | ||
| 445 | if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { | ||
| 446 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); | ||
| 447 | std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; | ||
| 448 | out += "refl_value.g = " + value + ";\n"; | ||
| 449 | } else { | ||
| 450 | out += "refl_value.g = refl_value.r;\n"; | ||
| 451 | } | ||
| 452 | |||
| 453 | // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used | ||
| 454 | if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { | ||
| 455 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); | ||
| 456 | std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; | ||
| 457 | out += "refl_value.b = " + value + ";\n"; | ||
| 458 | } else { | ||
| 459 | out += "refl_value.b = refl_value.r;\n"; | ||
| 460 | } | ||
| 461 | |||
| 462 | // Specular 1 component | ||
| 463 | std::string d1_lut_value = "1.0"; | ||
| 464 | if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { | ||
| 465 | // Lookup specular "distribution 1" LUT value | ||
| 466 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); | ||
| 467 | d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; | ||
| 468 | } | ||
| 469 | std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; | ||
| 470 | |||
| 471 | // Fresnel | ||
| 472 | if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { | ||
| 473 | // Lookup fresnel LUT value | ||
| 474 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); | ||
| 475 | std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; | ||
| 476 | |||
| 477 | // Enabled for difffuse lighting alpha component | ||
| 478 | if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || | ||
| 479 | config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) | ||
| 480 | out += "diffuse_sum.a *= " + value + ";\n"; | ||
| 481 | |||
| 482 | // Enabled for the specular lighting alpha component | ||
| 483 | if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || | ||
| 484 | config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) | ||
| 485 | out += "specular_sum.a *= " + value + ";\n"; | ||
| 486 | } | ||
| 487 | |||
| 488 | // Compute primary fragment color (diffuse lighting) function | ||
| 489 | out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; | ||
| 490 | |||
| 491 | // Compute secondary fragment color (specular lighting) function | ||
| 492 | out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; | ||
| 493 | } | ||
| 494 | |||
| 495 | // Sum final lighting result | ||
| 496 | out += "diffuse_sum.rgb += lighting_global_ambient;\n"; | ||
| 497 | out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n"; | ||
| 498 | out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; | ||
| 499 | } | ||
| 500 | |||
| 323 | std::string GenerateFragmentShader(const PicaShaderConfig& config) { | 501 | std::string GenerateFragmentShader(const PicaShaderConfig& config) { |
| 324 | std::string out = R"( | 502 | std::string out = R"( |
| 325 | #version 330 core | 503 | #version 330 core |
| 326 | #define NUM_TEV_STAGES 6 | 504 | #define NUM_TEV_STAGES 6 |
| 505 | #define NUM_LIGHTS 8 | ||
| 506 | #define LIGHTING_LUT_SIZE 256 | ||
| 507 | #define FLOAT_255 (255.0 / 256.0) | ||
| 327 | 508 | ||
| 328 | in vec4 primary_color; | 509 | in vec4 primary_color; |
| 329 | in vec2 texcoord[3]; | 510 | in vec2 texcoord[3]; |
| 511 | in vec4 normquat; | ||
| 512 | in vec3 view; | ||
| 330 | 513 | ||
| 331 | out vec4 color; | 514 | out vec4 color; |
| 332 | 515 | ||
| 516 | struct LightSrc { | ||
| 517 | vec3 specular_0; | ||
| 518 | vec3 specular_1; | ||
| 519 | vec3 diffuse; | ||
| 520 | vec3 ambient; | ||
| 521 | vec3 position; | ||
| 522 | }; | ||
| 523 | |||
| 333 | layout (std140) uniform shader_data { | 524 | layout (std140) uniform shader_data { |
| 334 | vec4 const_color[NUM_TEV_STAGES]; | 525 | vec4 const_color[NUM_TEV_STAGES]; |
| 335 | vec4 tev_combiner_buffer_color; | 526 | vec4 tev_combiner_buffer_color; |
| 336 | int alphatest_ref; | 527 | int alphatest_ref; |
| 337 | float depth_offset; | 528 | float depth_offset; |
| 529 | vec3 lighting_global_ambient; | ||
| 530 | LightSrc light_src[NUM_LIGHTS]; | ||
| 338 | }; | 531 | }; |
| 339 | 532 | ||
| 340 | uniform sampler2D tex[3]; | 533 | uniform sampler2D tex[3]; |
| 534 | uniform sampler1D lut[6]; | ||
| 535 | |||
| 536 | // Rotate the vector v by the quaternion q | ||
| 537 | vec3 quaternion_rotate(vec4 q, vec3 v) { | ||
| 538 | return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); | ||
| 539 | } | ||
| 341 | 540 | ||
| 342 | void main() { | 541 | void main() { |
| 542 | vec4 primary_fragment_color = vec4(0.0); | ||
| 543 | vec4 secondary_fragment_color = vec4(0.0); | ||
| 343 | )"; | 544 | )"; |
| 344 | 545 | ||
| 345 | // Do not do any sort of processing if it's obvious we're not going to pass the alpha test | 546 | // Do not do any sort of processing if it's obvious we're not going to pass the alpha test |
| @@ -348,6 +549,9 @@ void main() { | |||
| 348 | return out; | 549 | return out; |
| 349 | } | 550 | } |
| 350 | 551 | ||
| 552 | if (config.lighting.enable) | ||
| 553 | WriteLighting(out, config); | ||
| 554 | |||
| 351 | out += "vec4 combiner_buffer = vec4(0.0);\n"; | 555 | out += "vec4 combiner_buffer = vec4(0.0);\n"; |
| 352 | out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; | 556 | out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; |
| 353 | out += "vec4 last_tex_env_out = vec4(0.0);\n"; | 557 | out += "vec4 last_tex_env_out = vec4(0.0);\n"; |
| @@ -369,21 +573,28 @@ void main() { | |||
| 369 | 573 | ||
| 370 | std::string GenerateVertexShader() { | 574 | std::string GenerateVertexShader() { |
| 371 | std::string out = "#version 330 core\n"; | 575 | std::string out = "#version 330 core\n"; |
| 576 | |||
| 372 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; | 577 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; |
| 373 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; | 578 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; |
| 374 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; | 579 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; |
| 375 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; | 580 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; |
| 376 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; | 581 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; |
| 582 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; | ||
| 583 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; | ||
| 377 | 584 | ||
| 378 | out += R"( | 585 | out += R"( |
| 379 | out vec4 primary_color; | 586 | out vec4 primary_color; |
| 380 | out vec2 texcoord[3]; | 587 | out vec2 texcoord[3]; |
| 588 | out vec4 normquat; | ||
| 589 | out vec3 view; | ||
| 381 | 590 | ||
| 382 | void main() { | 591 | void main() { |
| 383 | primary_color = vert_color; | 592 | primary_color = vert_color; |
| 384 | texcoord[0] = vert_texcoord0; | 593 | texcoord[0] = vert_texcoord0; |
| 385 | texcoord[1] = vert_texcoord1; | 594 | texcoord[1] = vert_texcoord1; |
| 386 | texcoord[2] = vert_texcoord2; | 595 | texcoord[2] = vert_texcoord2; |
| 596 | normquat = vert_normquat; | ||
| 597 | view = vert_view; | ||
| 387 | gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); | 598 | gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); |
| 388 | } | 599 | } |
| 389 | )"; | 600 | )"; |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 046aae14f..097242f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h | |||
| @@ -14,6 +14,8 @@ enum Attributes { | |||
| 14 | ATTRIBUTE_TEXCOORD0, | 14 | ATTRIBUTE_TEXCOORD0, |
| 15 | ATTRIBUTE_TEXCOORD1, | 15 | ATTRIBUTE_TEXCOORD1, |
| 16 | ATTRIBUTE_TEXCOORD2, | 16 | ATTRIBUTE_TEXCOORD2, |
| 17 | ATTRIBUTE_NORMQUAT, | ||
| 18 | ATTRIBUTE_VIEW, | ||
| 17 | }; | 19 | }; |
| 18 | 20 | ||
| 19 | /** | 21 | /** |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index a82372995..08e4d0b54 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -48,6 +48,10 @@ OpenGLState::OpenGLState() { | |||
| 48 | texture_unit.sampler = 0; | 48 | texture_unit.sampler = 0; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | for (auto& lut : lighting_lut) { | ||
| 52 | lut.texture_1d = 0; | ||
| 53 | } | ||
| 54 | |||
| 51 | draw.framebuffer = 0; | 55 | draw.framebuffer = 0; |
| 52 | draw.vertex_array = 0; | 56 | draw.vertex_array = 0; |
| 53 | draw.vertex_buffer = 0; | 57 | draw.vertex_buffer = 0; |
| @@ -170,6 +174,14 @@ void OpenGLState::Apply() { | |||
| 170 | } | 174 | } |
| 171 | } | 175 | } |
| 172 | 176 | ||
| 177 | // Lighting LUTs | ||
| 178 | for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { | ||
| 179 | if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { | ||
| 180 | glActiveTexture(GL_TEXTURE3 + i); | ||
| 181 | glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 173 | // Framebuffer | 185 | // Framebuffer |
| 174 | if (draw.framebuffer != cur_state.draw.framebuffer) { | 186 | if (draw.framebuffer != cur_state.draw.framebuffer) { |
| 175 | glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); | 187 | glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b8ab45bb8..e848058d7 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -62,6 +62,10 @@ public: | |||
| 62 | } texture_units[3]; | 62 | } texture_units[3]; |
| 63 | 63 | ||
| 64 | struct { | 64 | struct { |
| 65 | GLuint texture_1d; // GL_TEXTURE_BINDING_1D | ||
| 66 | } lighting_lut[6]; | ||
| 67 | |||
| 68 | struct { | ||
| 65 | GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING | 69 | GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING |
| 66 | GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING | 70 | GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING |
| 67 | GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING | 71 | GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING |
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 04c1d1a34..3d6c4e9e5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h | |||
| @@ -10,6 +10,9 @@ | |||
| 10 | 10 | ||
| 11 | #include "video_core/pica.h" | 11 | #include "video_core/pica.h" |
| 12 | 12 | ||
| 13 | using GLvec3 = std::array<GLfloat, 3>; | ||
| 14 | using GLvec4 = std::array<GLfloat, 4>; | ||
| 15 | |||
| 13 | namespace PicaToGL { | 16 | namespace PicaToGL { |
| 14 | 17 | ||
| 15 | inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { | 18 | inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { |
| @@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) { | |||
| 175 | return stencil_op_table[(unsigned)action]; | 178 | return stencil_op_table[(unsigned)action]; |
| 176 | } | 179 | } |
| 177 | 180 | ||
| 178 | inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { | 181 | inline GLvec4 ColorRGBA8(const u32 color) { |
| 179 | return { { (color >> 0 & 0xFF) / 255.0f, | 182 | return { { (color >> 0 & 0xFF) / 255.0f, |
| 180 | (color >> 8 & 0xFF) / 255.0f, | 183 | (color >> 8 & 0xFF) / 255.0f, |
| 181 | (color >> 16 & 0xFF) / 255.0f, | 184 | (color >> 16 & 0xFF) / 255.0f, |
| @@ -183,4 +186,11 @@ inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { | |||
| 183 | } }; | 186 | } }; |
| 184 | } | 187 | } |
| 185 | 188 | ||
| 189 | inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { | ||
| 190 | return { { color.r / 255.0f, | ||
| 191 | color.g / 255.0f, | ||
| 192 | color.b / 255.0f | ||
| 193 | } }; | ||
| 194 | } | ||
| 195 | |||
| 186 | } // namespace | 196 | } // namespace |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a6a38f0af..ca3a6a6b4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -81,8 +81,8 @@ struct ScreenRectVertex { | |||
| 81 | * The projection part of the matrix is trivial, hence these operations are represented | 81 | * The projection part of the matrix is trivial, hence these operations are represented |
| 82 | * by a 3x2 matrix. | 82 | * by a 3x2 matrix. |
| 83 | */ | 83 | */ |
| 84 | static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) { | 84 | static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { |
| 85 | std::array<GLfloat, 3*2> matrix; | 85 | std::array<GLfloat, 3 * 2> matrix; |
| 86 | 86 | ||
| 87 | matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; | 87 | matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; |
| 88 | matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; | 88 | matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 59f54236b..44c234ed8 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -134,11 +134,13 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr | |||
| 134 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | 134 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | 137 | LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " |
| 138 | "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", | ||
| 138 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 139 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
| 139 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | 140 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), |
| 140 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | 141 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
| 141 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | 142 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), |
| 143 | ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); | ||
| 142 | 144 | ||
| 143 | return ret; | 145 | return ret; |
| 144 | } | 146 | } |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 1c6fa592c..f068cd93f 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -37,17 +37,19 @@ struct OutputVertex { | |||
| 37 | Math::Vec4<float24> color; | 37 | Math::Vec4<float24> color; |
| 38 | Math::Vec2<float24> tc0; | 38 | Math::Vec2<float24> tc0; |
| 39 | Math::Vec2<float24> tc1; | 39 | Math::Vec2<float24> tc1; |
| 40 | float24 pad[6]; | 40 | INSERT_PADDING_WORDS(2); |
| 41 | Math::Vec3<float24> view; | ||
| 42 | INSERT_PADDING_WORDS(1); | ||
| 41 | Math::Vec2<float24> tc2; | 43 | Math::Vec2<float24> tc2; |
| 42 | 44 | ||
| 43 | // Padding for optimal alignment | 45 | // Padding for optimal alignment |
| 44 | float24 pad2[4]; | 46 | INSERT_PADDING_WORDS(4); |
| 45 | 47 | ||
| 46 | // Attributes used to store intermediate results | 48 | // Attributes used to store intermediate results |
| 47 | 49 | ||
| 48 | // position after perspective divide | 50 | // position after perspective divide |
| 49 | Math::Vec3<float24> screenpos; | 51 | Math::Vec3<float24> screenpos; |
| 50 | float24 pad3; | 52 | INSERT_PADDING_WORDS(1); |
| 51 | 53 | ||
| 52 | // Linear interpolation | 54 | // Linear interpolation |
| 53 | // factor: 0=this, 1=vtx | 55 | // factor: 0=this, 1=vtx |