diff options
Diffstat (limited to 'src')
56 files changed, 1771 insertions, 1279 deletions
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp index ba6acf28e..84f9c03a7 100644 --- a/src/audio_core/audio_core.cpp +++ b/src/audio_core/audio_core.cpp | |||
| @@ -56,20 +56,8 @@ void AddAddressSpace(Kernel::VMManager& address_space) { | |||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | void SelectSink(std::string sink_id) { | 58 | void SelectSink(std::string sink_id) { |
| 59 | auto iter = | 59 | const SinkDetails& sink_details = GetSinkDetails(sink_id); |
| 60 | std::find_if(g_sink_details.begin(), g_sink_details.end(), | 60 | DSP::HLE::SetSink(sink_details.factory()); |
| 61 | [sink_id](const auto& sink_detail) { return sink_detail.id == sink_id; }); | ||
| 62 | |||
| 63 | if (sink_id == "auto" || iter == g_sink_details.end()) { | ||
| 64 | if (sink_id != "auto") { | ||
| 65 | LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id %s", sink_id.c_str()); | ||
| 66 | } | ||
| 67 | // Auto-select. | ||
| 68 | // g_sink_details is ordered in terms of desirability, with the best choice at the front. | ||
| 69 | iter = g_sink_details.begin(); | ||
| 70 | } | ||
| 71 | |||
| 72 | DSP::HLE::SetSink(iter->factory()); | ||
| 73 | } | 61 | } |
| 74 | 62 | ||
| 75 | void EnableStretching(bool enable) { | 63 | void EnableStretching(bool enable) { |
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h index e7668438c..c732926a2 100644 --- a/src/audio_core/null_sink.h +++ b/src/audio_core/null_sink.h | |||
| @@ -23,6 +23,12 @@ public: | |||
| 23 | size_t SamplesInQueue() const override { | 23 | size_t SamplesInQueue() const override { |
| 24 | return 0; | 24 | return 0; |
| 25 | } | 25 | } |
| 26 | |||
| 27 | void SetDevice(int device_id) override {} | ||
| 28 | |||
| 29 | std::vector<std::string> GetDeviceList() const override { | ||
| 30 | return {}; | ||
| 31 | } | ||
| 26 | }; | 32 | }; |
| 27 | 33 | ||
| 28 | } // namespace AudioCore | 34 | } // namespace AudioCore |
diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp index 4b66cd826..933c5f16d 100644 --- a/src/audio_core/sdl2_sink.cpp +++ b/src/audio_core/sdl2_sink.cpp | |||
| @@ -4,12 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #include <list> | 5 | #include <list> |
| 6 | #include <numeric> | 6 | #include <numeric> |
| 7 | #include <vector> | ||
| 8 | #include <SDL.h> | 7 | #include <SDL.h> |
| 9 | #include "audio_core/audio_core.h" | 8 | #include "audio_core/audio_core.h" |
| 10 | #include "audio_core/sdl2_sink.h" | 9 | #include "audio_core/sdl2_sink.h" |
| 11 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 12 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "core/settings.h" | ||
| 13 | 13 | ||
| 14 | namespace AudioCore { | 14 | namespace AudioCore { |
| 15 | 15 | ||
| @@ -42,10 +42,24 @@ SDL2Sink::SDL2Sink() : impl(std::make_unique<Impl>()) { | |||
| 42 | SDL_AudioSpec obtained_audiospec; | 42 | SDL_AudioSpec obtained_audiospec; |
| 43 | SDL_zero(obtained_audiospec); | 43 | SDL_zero(obtained_audiospec); |
| 44 | 44 | ||
| 45 | impl->audio_device_id = | 45 | int device_count = SDL_GetNumAudioDevices(0); |
| 46 | SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0); | 46 | device_list.clear(); |
| 47 | for (int i = 0; i < device_count; ++i) { | ||
| 48 | device_list.push_back(SDL_GetAudioDeviceName(i, 0)); | ||
| 49 | } | ||
| 50 | |||
| 51 | const char* device = nullptr; | ||
| 52 | |||
| 53 | if (device_count >= 1 && Settings::values.audio_device_id != "auto" && | ||
| 54 | !Settings::values.audio_device_id.empty()) { | ||
| 55 | device = Settings::values.audio_device_id.c_str(); | ||
| 56 | } | ||
| 57 | |||
| 58 | impl->audio_device_id = SDL_OpenAudioDevice(device, false, &desired_audiospec, | ||
| 59 | &obtained_audiospec, SDL_AUDIO_ALLOW_ANY_CHANGE); | ||
| 47 | if (impl->audio_device_id <= 0) { | 60 | if (impl->audio_device_id <= 0) { |
| 48 | LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed with: %s", SDL_GetError()); | 61 | LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed with code %d for device \"%s\"", |
| 62 | impl->audio_device_id, Settings::values.audio_device_id.c_str()); | ||
| 49 | return; | 63 | return; |
| 50 | } | 64 | } |
| 51 | 65 | ||
| @@ -69,6 +83,10 @@ unsigned int SDL2Sink::GetNativeSampleRate() const { | |||
| 69 | return impl->sample_rate; | 83 | return impl->sample_rate; |
| 70 | } | 84 | } |
| 71 | 85 | ||
| 86 | std::vector<std::string> SDL2Sink::GetDeviceList() const { | ||
| 87 | return device_list; | ||
| 88 | } | ||
| 89 | |||
| 72 | void SDL2Sink::EnqueueSamples(const s16* samples, size_t sample_count) { | 90 | void SDL2Sink::EnqueueSamples(const s16* samples, size_t sample_count) { |
| 73 | if (impl->audio_device_id <= 0) | 91 | if (impl->audio_device_id <= 0) |
| 74 | return; | 92 | return; |
| @@ -96,6 +114,10 @@ size_t SDL2Sink::SamplesInQueue() const { | |||
| 96 | return total_size; | 114 | return total_size; |
| 97 | } | 115 | } |
| 98 | 116 | ||
| 117 | void SDL2Sink::SetDevice(int device_id) { | ||
| 118 | this->device_id = device_id; | ||
| 119 | } | ||
| 120 | |||
| 99 | void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) { | 121 | void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) { |
| 100 | Impl* impl = reinterpret_cast<Impl*>(impl_); | 122 | Impl* impl = reinterpret_cast<Impl*>(impl_); |
| 101 | 123 | ||
diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h index ccd0f7c7e..bcc725369 100644 --- a/src/audio_core/sdl2_sink.h +++ b/src/audio_core/sdl2_sink.h | |||
| @@ -21,9 +21,14 @@ public: | |||
| 21 | 21 | ||
| 22 | size_t SamplesInQueue() const override; | 22 | size_t SamplesInQueue() const override; |
| 23 | 23 | ||
| 24 | std::vector<std::string> GetDeviceList() const override; | ||
| 25 | void SetDevice(int device_id) override; | ||
| 26 | |||
| 24 | private: | 27 | private: |
| 25 | struct Impl; | 28 | struct Impl; |
| 26 | std::unique_ptr<Impl> impl; | 29 | std::unique_ptr<Impl> impl; |
| 30 | int device_id; | ||
| 31 | std::vector<std::string> device_list; | ||
| 27 | }; | 32 | }; |
| 28 | 33 | ||
| 29 | } // namespace AudioCore | 34 | } // namespace AudioCore |
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h index 08f3bab5b..558c8c0fe 100644 --- a/src/audio_core/sink.h +++ b/src/audio_core/sink.h | |||
| @@ -31,6 +31,15 @@ public: | |||
| 31 | 31 | ||
| 32 | /// Samples enqueued that have not been played yet. | 32 | /// Samples enqueued that have not been played yet. |
| 33 | virtual std::size_t SamplesInQueue() const = 0; | 33 | virtual std::size_t SamplesInQueue() const = 0; |
| 34 | |||
| 35 | /** | ||
| 36 | * Sets the desired output device. | ||
| 37 | * @paran device_id Id of the desired device. | ||
| 38 | */ | ||
| 39 | virtual void SetDevice(int device_id) = 0; | ||
| 40 | |||
| 41 | /// Returns the list of available devices. | ||
| 42 | virtual std::vector<std::string> GetDeviceList() const = 0; | ||
| 34 | }; | 43 | }; |
| 35 | 44 | ||
| 36 | } // namespace | 45 | } // namespace |
diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp index 95ccc9e9d..6972395af 100644 --- a/src/audio_core/sink_details.cpp +++ b/src/audio_core/sink_details.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <memory> | 6 | #include <memory> |
| 6 | #include <vector> | 7 | #include <vector> |
| 7 | #include "audio_core/null_sink.h" | 8 | #include "audio_core/null_sink.h" |
| @@ -9,6 +10,7 @@ | |||
| 9 | #ifdef HAVE_SDL2 | 10 | #ifdef HAVE_SDL2 |
| 10 | #include "audio_core/sdl2_sink.h" | 11 | #include "audio_core/sdl2_sink.h" |
| 11 | #endif | 12 | #endif |
| 13 | #include "common/logging/log.h" | ||
| 12 | 14 | ||
| 13 | namespace AudioCore { | 15 | namespace AudioCore { |
| 14 | 16 | ||
| @@ -20,4 +22,21 @@ const std::vector<SinkDetails> g_sink_details = { | |||
| 20 | {"null", []() { return std::make_unique<NullSink>(); }}, | 22 | {"null", []() { return std::make_unique<NullSink>(); }}, |
| 21 | }; | 23 | }; |
| 22 | 24 | ||
| 25 | const SinkDetails& GetSinkDetails(std::string sink_id) { | ||
| 26 | auto iter = | ||
| 27 | std::find_if(g_sink_details.begin(), g_sink_details.end(), | ||
| 28 | [sink_id](const auto& sink_detail) { return sink_detail.id == sink_id; }); | ||
| 29 | |||
| 30 | if (sink_id == "auto" || iter == g_sink_details.end()) { | ||
| 31 | if (sink_id != "auto") { | ||
| 32 | LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id %s", sink_id.c_str()); | ||
| 33 | } | ||
| 34 | // Auto-select. | ||
| 35 | // g_sink_details is ordered in terms of desirability, with the best choice at the front. | ||
| 36 | iter = g_sink_details.begin(); | ||
| 37 | } | ||
| 38 | |||
| 39 | return *iter; | ||
| 40 | } | ||
| 41 | |||
| 23 | } // namespace AudioCore | 42 | } // namespace AudioCore |
diff --git a/src/audio_core/sink_details.h b/src/audio_core/sink_details.h index 4b30cf835..9d3735171 100644 --- a/src/audio_core/sink_details.h +++ b/src/audio_core/sink_details.h | |||
| @@ -24,4 +24,6 @@ struct SinkDetails { | |||
| 24 | 24 | ||
| 25 | extern const std::vector<SinkDetails> g_sink_details; | 25 | extern const std::vector<SinkDetails> g_sink_details; |
| 26 | 26 | ||
| 27 | const SinkDetails& GetSinkDetails(std::string sink_id); | ||
| 28 | |||
| 27 | } // namespace AudioCore | 29 | } // namespace AudioCore |
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp index 99c096ac7..76f5caeb1 100644 --- a/src/citra/citra.cpp +++ b/src/citra/citra.cpp | |||
| @@ -141,6 +141,26 @@ int main(int argc, char** argv) { | |||
| 141 | case Core::System::ResultStatus::ErrorLoader: | 141 | case Core::System::ResultStatus::ErrorLoader: |
| 142 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); | 142 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); |
| 143 | return -1; | 143 | return -1; |
| 144 | case Core::System::ResultStatus::ErrorLoader_ErrorEncrypted: | ||
| 145 | LOG_CRITICAL(Frontend, "The game that you are trying to load must be decrypted before " | ||
| 146 | "being used with Citra. \n\n For more information on dumping and " | ||
| 147 | "decrypting games, please refer to: " | ||
| 148 | "https://citra-emu.org/wiki/Dumping-Game-Cartridges"); | ||
| 149 | return -1; | ||
| 150 | case Core::System::ResultStatus::ErrorLoader_ErrorInvalidFormat: | ||
| 151 | LOG_CRITICAL(Frontend, "Error while loading ROM: The ROM format is not supported."); | ||
| 152 | return -1; | ||
| 153 | case Core::System::ResultStatus::ErrorNotInitialized: | ||
| 154 | LOG_CRITICAL(Frontend, "CPUCore not initialized"); | ||
| 155 | return -1; | ||
| 156 | case Core::System::ResultStatus::ErrorSystemMode: | ||
| 157 | LOG_CRITICAL(Frontend, "Failed to determine system mode!"); | ||
| 158 | return -1; | ||
| 159 | case Core::System::ResultStatus::ErrorVideoCore: | ||
| 160 | LOG_CRITICAL(Frontend, "VideoCore not initialized"); | ||
| 161 | return -1; | ||
| 162 | case Core::System::ResultStatus::Success: | ||
| 163 | break; // Expected case | ||
| 144 | } | 164 | } |
| 145 | 165 | ||
| 146 | while (emu_window->IsOpen()) { | 166 | while (emu_window->IsOpen()) { |
diff --git a/src/citra/config.cpp b/src/citra/config.cpp index bd8ac563b..827c90e55 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp | |||
| @@ -63,7 +63,8 @@ void Config::ReadValues() { | |||
| 63 | // Renderer | 63 | // Renderer |
| 64 | Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); | 64 | Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); |
| 65 | Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); | 65 | Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); |
| 66 | Settings::values.resolution_factor = sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); | 66 | Settings::values.resolution_factor = |
| 67 | (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); | ||
| 67 | Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false); | 68 | Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false); |
| 68 | Settings::values.toggle_framelimit = | 69 | Settings::values.toggle_framelimit = |
| 69 | sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); | 70 | sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); |
| @@ -81,6 +82,7 @@ void Config::ReadValues() { | |||
| 81 | Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); | 82 | Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); |
| 82 | Settings::values.enable_audio_stretching = | 83 | Settings::values.enable_audio_stretching = |
| 83 | sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); | 84 | sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); |
| 85 | Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); | ||
| 84 | 86 | ||
| 85 | // Data Storage | 87 | // Data Storage |
| 86 | Settings::values.use_virtual_sd = | 88 | Settings::values.use_virtual_sd = |
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 7996813b4..d728fb9e8 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h | |||
| @@ -91,6 +91,10 @@ output_engine = | |||
| 91 | # 0: No, 1 (default): Yes | 91 | # 0: No, 1 (default): Yes |
| 92 | enable_audio_stretching = | 92 | enable_audio_stretching = |
| 93 | 93 | ||
| 94 | # Which audio device to use. | ||
| 95 | # auto (default): Auto-select | ||
| 96 | output_device = | ||
| 97 | |||
| 94 | [Data Storage] | 98 | [Data Storage] |
| 95 | # Whether to create a virtual SD card. | 99 | # Whether to create a virtual SD card. |
| 96 | # 1 (default): Yes, 0: No | 100 | # 1 (default): Yes, 0: No |
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index 93f1c339d..d4460bf01 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt | |||
| @@ -14,7 +14,6 @@ set(SRCS | |||
| 14 | debugger/graphics/graphics_tracing.cpp | 14 | debugger/graphics/graphics_tracing.cpp |
| 15 | debugger/graphics/graphics_vertex_shader.cpp | 15 | debugger/graphics/graphics_vertex_shader.cpp |
| 16 | debugger/profiler.cpp | 16 | debugger/profiler.cpp |
| 17 | debugger/ramview.cpp | ||
| 18 | debugger/registers.cpp | 17 | debugger/registers.cpp |
| 19 | debugger/wait_tree.cpp | 18 | debugger/wait_tree.cpp |
| 20 | util/spinbox.cpp | 19 | util/spinbox.cpp |
| @@ -48,7 +47,6 @@ set(HEADERS | |||
| 48 | debugger/graphics/graphics_tracing.h | 47 | debugger/graphics/graphics_tracing.h |
| 49 | debugger/graphics/graphics_vertex_shader.h | 48 | debugger/graphics/graphics_vertex_shader.h |
| 50 | debugger/profiler.h | 49 | debugger/profiler.h |
| 51 | debugger/ramview.h | ||
| 52 | debugger/registers.h | 50 | debugger/registers.h |
| 53 | debugger/wait_tree.h | 51 | debugger/wait_tree.h |
| 54 | util/spinbox.h | 52 | util/spinbox.h |
| @@ -100,7 +98,7 @@ if (APPLE) | |||
| 100 | else() | 98 | else() |
| 101 | add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) | 99 | add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) |
| 102 | endif() | 100 | endif() |
| 103 | target_link_libraries(citra-qt core video_core audio_core common qhexedit) | 101 | target_link_libraries(citra-qt core video_core audio_core common) |
| 104 | target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) | 102 | target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) |
| 105 | target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads) | 103 | target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads) |
| 106 | 104 | ||
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp index 8021667d0..f776e16b2 100644 --- a/src/citra_qt/config.cpp +++ b/src/citra_qt/config.cpp | |||
| @@ -63,6 +63,8 @@ void Config::ReadValues() { | |||
| 63 | Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString(); | 63 | Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString(); |
| 64 | Settings::values.enable_audio_stretching = | 64 | Settings::values.enable_audio_stretching = |
| 65 | qt_config->value("enable_audio_stretching", true).toBool(); | 65 | qt_config->value("enable_audio_stretching", true).toBool(); |
| 66 | Settings::values.audio_device_id = | ||
| 67 | qt_config->value("output_device", "auto").toString().toStdString(); | ||
| 66 | qt_config->endGroup(); | 68 | qt_config->endGroup(); |
| 67 | 69 | ||
| 68 | qt_config->beginGroup("Data Storage"); | 70 | qt_config->beginGroup("Data Storage"); |
| @@ -169,6 +171,7 @@ void Config::SaveValues() { | |||
| 169 | qt_config->beginGroup("Audio"); | 171 | qt_config->beginGroup("Audio"); |
| 170 | qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id)); | 172 | qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id)); |
| 171 | qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching); | 173 | qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching); |
| 174 | qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id)); | ||
| 172 | qt_config->endGroup(); | 175 | qt_config->endGroup(); |
| 173 | 176 | ||
| 174 | qt_config->beginGroup("Data Storage"); | 177 | qt_config->beginGroup("Data Storage"); |
diff --git a/src/citra_qt/configure_audio.cpp b/src/citra_qt/configure_audio.cpp index 3cdd4c780..3ddcf9232 100644 --- a/src/citra_qt/configure_audio.cpp +++ b/src/citra_qt/configure_audio.cpp | |||
| @@ -2,6 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <memory> | ||
| 6 | #include "audio_core/audio_core.h" | ||
| 7 | #include "audio_core/sink.h" | ||
| 5 | #include "audio_core/sink_details.h" | 8 | #include "audio_core/sink_details.h" |
| 6 | #include "citra_qt/configure_audio.h" | 9 | #include "citra_qt/configure_audio.h" |
| 7 | #include "core/settings.h" | 10 | #include "core/settings.h" |
| @@ -18,6 +21,8 @@ ConfigureAudio::ConfigureAudio(QWidget* parent) | |||
| 18 | } | 21 | } |
| 19 | 22 | ||
| 20 | this->setConfiguration(); | 23 | this->setConfiguration(); |
| 24 | connect(ui->output_sink_combo_box, SIGNAL(currentIndexChanged(int)), this, | ||
| 25 | SLOT(updateAudioDevices(int))); | ||
| 21 | } | 26 | } |
| 22 | 27 | ||
| 23 | ConfigureAudio::~ConfigureAudio() {} | 28 | ConfigureAudio::~ConfigureAudio() {} |
| @@ -33,6 +38,19 @@ void ConfigureAudio::setConfiguration() { | |||
| 33 | ui->output_sink_combo_box->setCurrentIndex(new_sink_index); | 38 | ui->output_sink_combo_box->setCurrentIndex(new_sink_index); |
| 34 | 39 | ||
| 35 | ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching); | 40 | ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching); |
| 41 | |||
| 42 | // The device list cannot be pre-populated (nor listed) until the output sink is known. | ||
| 43 | updateAudioDevices(new_sink_index); | ||
| 44 | |||
| 45 | int new_device_index = -1; | ||
| 46 | for (int index = 0; index < ui->audio_device_combo_box->count(); index++) { | ||
| 47 | if (ui->audio_device_combo_box->itemText(index).toStdString() == | ||
| 48 | Settings::values.audio_device_id) { | ||
| 49 | new_device_index = index; | ||
| 50 | break; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | ui->audio_device_combo_box->setCurrentIndex(new_device_index); | ||
| 36 | } | 54 | } |
| 37 | 55 | ||
| 38 | void ConfigureAudio::applyConfiguration() { | 56 | void ConfigureAudio::applyConfiguration() { |
| @@ -40,5 +58,20 @@ void ConfigureAudio::applyConfiguration() { | |||
| 40 | ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex()) | 58 | ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex()) |
| 41 | .toStdString(); | 59 | .toStdString(); |
| 42 | Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked(); | 60 | Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked(); |
| 61 | Settings::values.audio_device_id = | ||
| 62 | ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex()) | ||
| 63 | .toStdString(); | ||
| 43 | Settings::Apply(); | 64 | Settings::Apply(); |
| 44 | } | 65 | } |
| 66 | |||
| 67 | void ConfigureAudio::updateAudioDevices(int sink_index) { | ||
| 68 | ui->audio_device_combo_box->clear(); | ||
| 69 | ui->audio_device_combo_box->addItem("auto"); | ||
| 70 | |||
| 71 | std::string sink_id = ui->output_sink_combo_box->itemText(sink_index).toStdString(); | ||
| 72 | std::vector<std::string> device_list = | ||
| 73 | AudioCore::GetSinkDetails(sink_id).factory()->GetDeviceList(); | ||
| 74 | for (const auto& device : device_list) { | ||
| 75 | ui->audio_device_combo_box->addItem(device.c_str()); | ||
| 76 | } | ||
| 77 | } | ||
diff --git a/src/citra_qt/configure_audio.h b/src/citra_qt/configure_audio.h index 51df2e27b..8190e694f 100644 --- a/src/citra_qt/configure_audio.h +++ b/src/citra_qt/configure_audio.h | |||
| @@ -20,6 +20,9 @@ public: | |||
| 20 | 20 | ||
| 21 | void applyConfiguration(); | 21 | void applyConfiguration(); |
| 22 | 22 | ||
| 23 | public slots: | ||
| 24 | void updateAudioDevices(int sink_index); | ||
| 25 | |||
| 23 | private: | 26 | private: |
| 24 | void setConfiguration(); | 27 | void setConfiguration(); |
| 25 | 28 | ||
diff --git a/src/citra_qt/configure_audio.ui b/src/citra_qt/configure_audio.ui index 3e2b4635f..dd870eb61 100644 --- a/src/citra_qt/configure_audio.ui +++ b/src/citra_qt/configure_audio.ui | |||
| @@ -35,6 +35,21 @@ | |||
| 35 | </property> | 35 | </property> |
| 36 | </widget> | 36 | </widget> |
| 37 | </item> | 37 | </item> |
| 38 | <item> | ||
| 39 | <layout class="QHBoxLayout"> | ||
| 40 | <item> | ||
| 41 | <widget class="QLabel"> | ||
| 42 | <property name="text"> | ||
| 43 | <string>Audio Device:</string> | ||
| 44 | </property> | ||
| 45 | </widget> | ||
| 46 | </item> | ||
| 47 | <item> | ||
| 48 | <widget class="QComboBox" name="audio_device_combo_box"> | ||
| 49 | </widget> | ||
| 50 | </item> | ||
| 51 | </layout> | ||
| 52 | </item> | ||
| 38 | </layout> | 53 | </layout> |
| 39 | </widget> | 54 | </widget> |
| 40 | </item> | 55 | </item> |
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index ff2e7e363..f37524190 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp | |||
| @@ -18,7 +18,9 @@ | |||
| 18 | #include "citra_qt/util/util.h" | 18 | #include "citra_qt/util/util.h" |
| 19 | #include "video_core/pica.h" | 19 | #include "video_core/pica.h" |
| 20 | #include "video_core/pica_state.h" | 20 | #include "video_core/pica_state.h" |
| 21 | #include "video_core/shader/debug_data.h" | ||
| 21 | #include "video_core/shader/shader.h" | 22 | #include "video_core/shader/shader.h" |
| 23 | #include "video_core/shader/shader_interpreter.h" | ||
| 22 | 24 | ||
| 23 | using nihstro::OpCode; | 25 | using nihstro::OpCode; |
| 24 | using nihstro::Instruction; | 26 | using nihstro::Instruction; |
| @@ -518,8 +520,9 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d | |||
| 518 | info.labels.insert({entry_point, "main"}); | 520 | info.labels.insert({entry_point, "main"}); |
| 519 | 521 | ||
| 520 | // Generate debug information | 522 | // Generate debug information |
| 521 | debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, | 523 | Pica::Shader::InterpreterEngine shader_engine; |
| 522 | shader_setup); | 524 | shader_engine.SetupBatch(shader_setup, entry_point); |
| 525 | debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes); | ||
| 523 | 526 | ||
| 524 | // Reload widget state | 527 | // Reload widget state |
| 525 | for (int attr = 0; attr < num_attributes; ++attr) { | 528 | for (int attr = 0; attr < num_attributes; ++attr) { |
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h index bedea0bed..3292573f3 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <QTreeView> | 8 | #include <QTreeView> |
| 9 | #include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h" | 9 | #include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h" |
| 10 | #include "nihstro/parser_shbin.h" | 10 | #include "nihstro/parser_shbin.h" |
| 11 | #include "video_core/shader/debug_data.h" | ||
| 11 | #include "video_core/shader/shader.h" | 12 | #include "video_core/shader/shader.h" |
| 12 | 13 | ||
| 13 | class QLabel; | 14 | class QLabel; |
diff --git a/src/citra_qt/debugger/ramview.cpp b/src/citra_qt/debugger/ramview.cpp deleted file mode 100644 index 10a09dda8..000000000 --- a/src/citra_qt/debugger/ramview.cpp +++ /dev/null | |||
| @@ -1,12 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "citra_qt/debugger/ramview.h" | ||
| 6 | |||
| 7 | GRamView::GRamView(QWidget* parent) : QHexEdit(parent) {} | ||
| 8 | |||
| 9 | void GRamView::OnCPUStepped() { | ||
| 10 | // TODO: QHexEdit doesn't show vertical scroll bars for > 10MB data streams... | ||
| 11 | // setData(QByteArray((const char*)Mem_RAM,sizeof(Mem_RAM)/8)); | ||
| 12 | } | ||
diff --git a/src/citra_qt/debugger/ramview.h b/src/citra_qt/debugger/ramview.h deleted file mode 100644 index d01cea93b..000000000 --- a/src/citra_qt/debugger/ramview.h +++ /dev/null | |||
| @@ -1,17 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "qhexedit.h" | ||
| 8 | |||
| 9 | class GRamView : public QHexEdit { | ||
| 10 | Q_OBJECT | ||
| 11 | |||
| 12 | public: | ||
| 13 | explicit GRamView(QWidget* parent = nullptr); | ||
| 14 | |||
| 15 | public slots: | ||
| 16 | void OnCPUStepped(); | ||
| 17 | }; | ||
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index 6d59cf640..f765c0147 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <QFileDialog> | 12 | #include <QFileDialog> |
| 13 | #include <QMessageBox> | 13 | #include <QMessageBox> |
| 14 | #include <QtGui> | 14 | #include <QtGui> |
| 15 | #include <QtWidgets> | ||
| 15 | #include "citra_qt/bootmanager.h" | 16 | #include "citra_qt/bootmanager.h" |
| 16 | #include "citra_qt/config.h" | 17 | #include "citra_qt/config.h" |
| 17 | #include "citra_qt/configure_dialog.h" | 18 | #include "citra_qt/configure_dialog.h" |
| @@ -24,7 +25,6 @@ | |||
| 24 | #include "citra_qt/debugger/graphics/graphics_tracing.h" | 25 | #include "citra_qt/debugger/graphics/graphics_tracing.h" |
| 25 | #include "citra_qt/debugger/graphics/graphics_vertex_shader.h" | 26 | #include "citra_qt/debugger/graphics/graphics_vertex_shader.h" |
| 26 | #include "citra_qt/debugger/profiler.h" | 27 | #include "citra_qt/debugger/profiler.h" |
| 27 | #include "citra_qt/debugger/ramview.h" | ||
| 28 | #include "citra_qt/debugger/registers.h" | 28 | #include "citra_qt/debugger/registers.h" |
| 29 | #include "citra_qt/debugger/wait_tree.h" | 29 | #include "citra_qt/debugger/wait_tree.h" |
| 30 | #include "citra_qt/game_list.h" | 30 | #include "citra_qt/game_list.h" |
| @@ -46,7 +46,6 @@ | |||
| 46 | #include "core/gdbstub/gdbstub.h" | 46 | #include "core/gdbstub/gdbstub.h" |
| 47 | #include "core/loader/loader.h" | 47 | #include "core/loader/loader.h" |
| 48 | #include "core/settings.h" | 48 | #include "core/settings.h" |
| 49 | #include "qhexedit.h" | ||
| 50 | #include "video_core/video_core.h" | 49 | #include "video_core/video_core.h" |
| 51 | 50 | ||
| 52 | #ifdef QT_STATICPLUGIN | 51 | #ifdef QT_STATICPLUGIN |
diff --git a/src/common/hash.cpp b/src/common/hash.cpp index 2309320bb..f3d390dc5 100644 --- a/src/common/hash.cpp +++ b/src/common/hash.cpp | |||
| @@ -16,7 +16,7 @@ namespace Common { | |||
| 16 | 16 | ||
| 17 | // Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do | 17 | // Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do |
| 18 | // the conversion here | 18 | // the conversion here |
| 19 | static FORCE_INLINE u64 getblock64(const u64* p, int i) { | 19 | static FORCE_INLINE u64 getblock64(const u64* p, size_t i) { |
| 20 | return p[i]; | 20 | return p[i]; |
| 21 | } | 21 | } |
| 22 | 22 | ||
| @@ -34,9 +34,9 @@ static FORCE_INLINE u64 fmix64(u64 k) { | |||
| 34 | // This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit | 34 | // This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit |
| 35 | // platforms (MurmurHash3_x64_128). It was taken from: | 35 | // platforms (MurmurHash3_x64_128). It was taken from: |
| 36 | // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp | 36 | // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp |
| 37 | void MurmurHash3_128(const void* key, int len, u32 seed, void* out) { | 37 | void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) { |
| 38 | const u8* data = (const u8*)key; | 38 | const u8* data = (const u8*)key; |
| 39 | const int nblocks = len / 16; | 39 | const size_t nblocks = len / 16; |
| 40 | 40 | ||
| 41 | u64 h1 = seed; | 41 | u64 h1 = seed; |
| 42 | u64 h2 = seed; | 42 | u64 h2 = seed; |
| @@ -48,7 +48,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out) { | |||
| 48 | 48 | ||
| 49 | const u64* blocks = (const u64*)(data); | 49 | const u64* blocks = (const u64*)(data); |
| 50 | 50 | ||
| 51 | for (int i = 0; i < nblocks; i++) { | 51 | for (size_t i = 0; i < nblocks; i++) { |
| 52 | u64 k1 = getblock64(blocks, i * 2 + 0); | 52 | u64 k1 = getblock64(blocks, i * 2 + 0); |
| 53 | u64 k2 = getblock64(blocks, i * 2 + 1); | 53 | u64 k2 = getblock64(blocks, i * 2 + 1); |
| 54 | 54 | ||
diff --git a/src/common/hash.h b/src/common/hash.h index a3850be68..ee2560dad 100644 --- a/src/common/hash.h +++ b/src/common/hash.h | |||
| @@ -4,11 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 7 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 8 | 9 | ||
| 9 | namespace Common { | 10 | namespace Common { |
| 10 | 11 | ||
| 11 | void MurmurHash3_128(const void* key, int len, u32 seed, void* out); | 12 | void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); |
| 12 | 13 | ||
| 13 | /** | 14 | /** |
| 14 | * Computes a 64-bit hash over the specified block of data | 15 | * Computes a 64-bit hash over the specified block of data |
| @@ -16,7 +17,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out); | |||
| 16 | * @param len Length of data (in bytes) to compute hash over | 17 | * @param len Length of data (in bytes) to compute hash over |
| 17 | * @returns 64-bit hash value that was computed over the data block | 18 | * @returns 64-bit hash value that was computed over the data block |
| 18 | */ | 19 | */ |
| 19 | static inline u64 ComputeHash64(const void* data, int len) { | 20 | static inline u64 ComputeHash64(const void* data, size_t len) { |
| 20 | u64 res[2]; | 21 | u64 res[2]; |
| 21 | MurmurHash3_128(data, len, 0, res); | 22 | MurmurHash3_128(data, len, 0, res); |
| 22 | return res[0]; | 23 | return res[0]; |
diff --git a/src/core/core.h b/src/core/core.h index 1015e8847..17572a74f 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -115,7 +115,7 @@ private: | |||
| 115 | static System s_instance; | 115 | static System s_instance; |
| 116 | }; | 116 | }; |
| 117 | 117 | ||
| 118 | static ARM_Interface& CPU() { | 118 | inline ARM_Interface& CPU() { |
| 119 | return System::GetInstance().CPU(); | 119 | return System::GetInstance().CPU(); |
| 120 | } | 120 | } |
| 121 | 121 | ||
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index a437d0823..276ecfdf6 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include "core/core.h" | 13 | #include "core/core.h" |
| 14 | #include "core/core_timing.h" | 14 | #include "core/core_timing.h" |
| 15 | 15 | ||
| 16 | int g_clock_rate_arm11 = 268123480; | 16 | int g_clock_rate_arm11 = BASE_CLOCK_RATE_ARM11; |
| 17 | 17 | ||
| 18 | // is this really necessary? | 18 | // is this really necessary? |
| 19 | #define INITIAL_SLICE_LENGTH 20000 | 19 | #define INITIAL_SLICE_LENGTH 20000 |
diff --git a/src/core/core_timing.h b/src/core/core_timing.h index b72a1b500..d2f85cd4d 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | // inside callback: | 21 | // inside callback: |
| 22 | // ScheduleEvent(periodInCycles - cycles_late, callback, "whatever") | 22 | // ScheduleEvent(periodInCycles - cycles_late, callback, "whatever") |
| 23 | 23 | ||
| 24 | constexpr int BASE_CLOCK_RATE_ARM11 = 268123480; | ||
| 24 | extern int g_clock_rate_arm11; | 25 | extern int g_clock_rate_arm11; |
| 25 | 26 | ||
| 26 | inline s64 msToCycles(int ms) { | 27 | inline s64 msToCycles(int ms) { |
diff --git a/src/core/file_sys/archive_extsavedata.cpp b/src/core/file_sys/archive_extsavedata.cpp index 51ce78435..dd2fb167f 100644 --- a/src/core/file_sys/archive_extsavedata.cpp +++ b/src/core/file_sys/archive_extsavedata.cpp | |||
| @@ -107,6 +107,8 @@ public: | |||
| 107 | case PathParser::NotFound: | 107 | case PathParser::NotFound: |
| 108 | LOG_ERROR(Service_FS, "%s not found", full_path.c_str()); | 108 | LOG_ERROR(Service_FS, "%s not found", full_path.c_str()); |
| 109 | return ERROR_FILE_NOT_FOUND; | 109 | return ERROR_FILE_NOT_FOUND; |
| 110 | case PathParser::FileFound: | ||
| 111 | break; // Expected 'success' case | ||
| 110 | } | 112 | } |
| 111 | 113 | ||
| 112 | FileUtil::IOFile file(full_path, "r+b"); | 114 | FileUtil::IOFile file(full_path, "r+b"); |
diff --git a/src/core/file_sys/archive_sdmc.cpp b/src/core/file_sys/archive_sdmc.cpp index 333dfb92e..72ff05c65 100644 --- a/src/core/file_sys/archive_sdmc.cpp +++ b/src/core/file_sys/archive_sdmc.cpp | |||
| @@ -72,6 +72,8 @@ ResultVal<std::unique_ptr<FileBackend>> SDMCArchive::OpenFileBase(const Path& pa | |||
| 72 | FileUtil::CreateEmptyFile(full_path); | 72 | FileUtil::CreateEmptyFile(full_path); |
| 73 | } | 73 | } |
| 74 | break; | 74 | break; |
| 75 | case PathParser::FileFound: | ||
| 76 | break; // Expected 'success' case | ||
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb"); | 79 | FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb"); |
| @@ -106,6 +108,8 @@ ResultCode SDMCArchive::DeleteFile(const Path& path) const { | |||
| 106 | case PathParser::DirectoryFound: | 108 | case PathParser::DirectoryFound: |
| 107 | LOG_ERROR(Service_FS, "%s is not a file", full_path.c_str()); | 109 | LOG_ERROR(Service_FS, "%s is not a file", full_path.c_str()); |
| 108 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; | 110 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; |
| 111 | case PathParser::FileFound: | ||
| 112 | break; // Expected 'success' case | ||
| 109 | } | 113 | } |
| 110 | 114 | ||
| 111 | if (FileUtil::Delete(full_path)) { | 115 | if (FileUtil::Delete(full_path)) { |
| @@ -154,6 +158,8 @@ static ResultCode DeleteDirectoryHelper(const Path& path, const std::string& mou | |||
| 154 | case PathParser::FileFound: | 158 | case PathParser::FileFound: |
| 155 | LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); | 159 | LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); |
| 156 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; | 160 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; |
| 161 | case PathParser::DirectoryFound: | ||
| 162 | break; // Expected 'success' case | ||
| 157 | } | 163 | } |
| 158 | 164 | ||
| 159 | if (deleter(full_path)) { | 165 | if (deleter(full_path)) { |
| @@ -197,6 +203,8 @@ ResultCode SDMCArchive::CreateFile(const FileSys::Path& path, u64 size) const { | |||
| 197 | case PathParser::FileFound: | 203 | case PathParser::FileFound: |
| 198 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); | 204 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); |
| 199 | return ERROR_ALREADY_EXISTS; | 205 | return ERROR_ALREADY_EXISTS; |
| 206 | case PathParser::NotFound: | ||
| 207 | break; // Expected 'success' case | ||
| 200 | } | 208 | } |
| 201 | 209 | ||
| 202 | if (size == 0) { | 210 | if (size == 0) { |
| @@ -238,6 +246,8 @@ ResultCode SDMCArchive::CreateDirectory(const Path& path) const { | |||
| 238 | case PathParser::FileFound: | 246 | case PathParser::FileFound: |
| 239 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); | 247 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); |
| 240 | return ERROR_ALREADY_EXISTS; | 248 | return ERROR_ALREADY_EXISTS; |
| 249 | case PathParser::NotFound: | ||
| 250 | break; // Expected 'success' case | ||
| 241 | } | 251 | } |
| 242 | 252 | ||
| 243 | if (FileUtil::CreateDir(mount_point + path.AsString())) { | 253 | if (FileUtil::CreateDir(mount_point + path.AsString())) { |
| @@ -281,6 +291,8 @@ ResultVal<std::unique_ptr<DirectoryBackend>> SDMCArchive::OpenDirectory(const Pa | |||
| 281 | case PathParser::FileInPath: | 291 | case PathParser::FileInPath: |
| 282 | LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); | 292 | LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); |
| 283 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; | 293 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; |
| 294 | case PathParser::DirectoryFound: | ||
| 295 | break; // Expected 'success' case | ||
| 284 | } | 296 | } |
| 285 | 297 | ||
| 286 | auto directory = std::make_unique<DiskDirectory>(full_path); | 298 | auto directory = std::make_unique<DiskDirectory>(full_path); |
diff --git a/src/core/file_sys/savedata_archive.cpp b/src/core/file_sys/savedata_archive.cpp index f2e6a06bc..f540c4a93 100644 --- a/src/core/file_sys/savedata_archive.cpp +++ b/src/core/file_sys/savedata_archive.cpp | |||
| @@ -57,6 +57,8 @@ ResultVal<std::unique_ptr<FileBackend>> SaveDataArchive::OpenFile(const Path& pa | |||
| 57 | FileUtil::CreateEmptyFile(full_path); | 57 | FileUtil::CreateEmptyFile(full_path); |
| 58 | } | 58 | } |
| 59 | break; | 59 | break; |
| 60 | case PathParser::FileFound: | ||
| 61 | break; // Expected 'success' case | ||
| 60 | } | 62 | } |
| 61 | 63 | ||
| 62 | FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb"); | 64 | FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb"); |
| @@ -91,6 +93,8 @@ ResultCode SaveDataArchive::DeleteFile(const Path& path) const { | |||
| 91 | case PathParser::NotFound: | 93 | case PathParser::NotFound: |
| 92 | LOG_ERROR(Service_FS, "File not found %s", full_path.c_str()); | 94 | LOG_ERROR(Service_FS, "File not found %s", full_path.c_str()); |
| 93 | return ERROR_FILE_NOT_FOUND; | 95 | return ERROR_FILE_NOT_FOUND; |
| 96 | case PathParser::FileFound: | ||
| 97 | break; // Expected 'success' case | ||
| 94 | } | 98 | } |
| 95 | 99 | ||
| 96 | if (FileUtil::Delete(full_path)) { | 100 | if (FileUtil::Delete(full_path)) { |
| @@ -139,6 +143,8 @@ static ResultCode DeleteDirectoryHelper(const Path& path, const std::string& mou | |||
| 139 | case PathParser::FileFound: | 143 | case PathParser::FileFound: |
| 140 | LOG_ERROR(Service_FS, "Unexpected file or directory %s", full_path.c_str()); | 144 | LOG_ERROR(Service_FS, "Unexpected file or directory %s", full_path.c_str()); |
| 141 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY; | 145 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY; |
| 146 | case PathParser::DirectoryFound: | ||
| 147 | break; // Expected 'success' case | ||
| 142 | } | 148 | } |
| 143 | 149 | ||
| 144 | if (deleter(full_path)) { | 150 | if (deleter(full_path)) { |
| @@ -182,6 +188,8 @@ ResultCode SaveDataArchive::CreateFile(const FileSys::Path& path, u64 size) cons | |||
| 182 | case PathParser::FileFound: | 188 | case PathParser::FileFound: |
| 183 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); | 189 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); |
| 184 | return ERROR_FILE_ALREADY_EXISTS; | 190 | return ERROR_FILE_ALREADY_EXISTS; |
| 191 | case PathParser::NotFound: | ||
| 192 | break; // Expected 'success' case | ||
| 185 | } | 193 | } |
| 186 | 194 | ||
| 187 | if (size == 0) { | 195 | if (size == 0) { |
| @@ -225,6 +233,8 @@ ResultCode SaveDataArchive::CreateDirectory(const Path& path) const { | |||
| 225 | case PathParser::FileFound: | 233 | case PathParser::FileFound: |
| 226 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); | 234 | LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); |
| 227 | return ERROR_DIRECTORY_ALREADY_EXISTS; | 235 | return ERROR_DIRECTORY_ALREADY_EXISTS; |
| 236 | case PathParser::NotFound: | ||
| 237 | break; // Expected 'success' case | ||
| 228 | } | 238 | } |
| 229 | 239 | ||
| 230 | if (FileUtil::CreateDir(mount_point + path.AsString())) { | 240 | if (FileUtil::CreateDir(mount_point + path.AsString())) { |
| @@ -269,6 +279,8 @@ ResultVal<std::unique_ptr<DirectoryBackend>> SaveDataArchive::OpenDirectory( | |||
| 269 | case PathParser::FileFound: | 279 | case PathParser::FileFound: |
| 270 | LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); | 280 | LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); |
| 271 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY; | 281 | return ERROR_UNEXPECTED_FILE_OR_DIRECTORY; |
| 282 | case PathParser::DirectoryFound: | ||
| 283 | break; // Expected 'success' case | ||
| 272 | } | 284 | } |
| 273 | 285 | ||
| 274 | auto directory = std::make_unique<DiskDirectory>(full_path); | 286 | auto directory = std::make_unique<DiskDirectory>(full_path); |
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index 1541cc39d..4f0f786ce 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp | |||
| @@ -98,9 +98,9 @@ void EmuWindow::AccelerometerChanged(float x, float y, float z) { | |||
| 98 | // TODO(wwylele): do a time stretch as it in GyroscopeChanged | 98 | // TODO(wwylele): do a time stretch as it in GyroscopeChanged |
| 99 | // The time stretch formula should be like | 99 | // The time stretch formula should be like |
| 100 | // stretched_vector = (raw_vector - gravity) * stretch_ratio + gravity | 100 | // stretched_vector = (raw_vector - gravity) * stretch_ratio + gravity |
| 101 | accel_x = x * coef; | 101 | accel_x = static_cast<s16>(x * coef); |
| 102 | accel_y = y * coef; | 102 | accel_y = static_cast<s16>(y * coef); |
| 103 | accel_z = z * coef; | 103 | accel_z = static_cast<s16>(z * coef); |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | void EmuWindow::GyroscopeChanged(float x, float y, float z) { | 106 | void EmuWindow::GyroscopeChanged(float x, float y, float z) { |
| @@ -109,9 +109,9 @@ void EmuWindow::GyroscopeChanged(float x, float y, float z) { | |||
| 109 | float stretch = | 109 | float stretch = |
| 110 | FULL_FPS / Common::Profiling::GetTimingResultsAggregator()->GetAggregatedResults().fps; | 110 | FULL_FPS / Common::Profiling::GetTimingResultsAggregator()->GetAggregatedResults().fps; |
| 111 | std::lock_guard<std::mutex> lock(gyro_mutex); | 111 | std::lock_guard<std::mutex> lock(gyro_mutex); |
| 112 | gyro_x = x * coef * stretch; | 112 | gyro_x = static_cast<s16>(x * coef * stretch); |
| 113 | gyro_y = y * coef * stretch; | 113 | gyro_y = static_cast<s16>(y * coef * stretch); |
| 114 | gyro_z = z * coef * stretch; | 114 | gyro_z = static_cast<s16>(z * coef * stretch); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | void EmuWindow::UpdateCurrentFramebufferLayout(unsigned width, unsigned height) { | 117 | void EmuWindow::UpdateCurrentFramebufferLayout(unsigned width, unsigned height) { |
diff --git a/src/core/hle/service/err_f.cpp b/src/core/hle/service/err_f.cpp index cd0a1a598..9da55f328 100644 --- a/src/core/hle/service/err_f.cpp +++ b/src/core/hle/service/err_f.cpp | |||
| @@ -227,6 +227,8 @@ static void ThrowFatalError(Interface* self) { | |||
| 227 | LOG_CRITICAL(Service_ERR, "FINST2: 0x%08X", | 227 | LOG_CRITICAL(Service_ERR, "FINST2: 0x%08X", |
| 228 | errtype.exception_data.exception_info.fpinst2); | 228 | errtype.exception_data.exception_info.fpinst2); |
| 229 | break; | 229 | break; |
| 230 | case ExceptionType::Undefined: | ||
| 231 | break; // Not logging exception_info for this case | ||
| 230 | } | 232 | } |
| 231 | LOG_CRITICAL(Service_ERR, "Datetime: %s", GetCurrentSystemTime().c_str()); | 233 | LOG_CRITICAL(Service_ERR, "Datetime: %s", GetCurrentSystemTime().c_str()); |
| 232 | break; | 234 | break; |
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 947958703..a8c1331ed 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp | |||
| @@ -149,7 +149,7 @@ static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, VAddr | |||
| 149 | u32 mask = Memory::Read32(masks_vaddr); | 149 | u32 mask = Memory::Read32(masks_vaddr); |
| 150 | 150 | ||
| 151 | // Update the current value of the register only for set mask bits | 151 | // Update the current value of the register only for set mask bits |
| 152 | reg_value = (reg_value & ~mask) | (data | mask); | 152 | reg_value = (reg_value & ~mask) | (data & mask); |
| 153 | 153 | ||
| 154 | WriteSingleHWReg(base_address, reg_value); | 154 | WriteSingleHWReg(base_address, reg_value); |
| 155 | 155 | ||
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 676154bd4..f14ab3811 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -35,6 +35,15 @@ static u32 next_gyroscope_index; | |||
| 35 | static int enable_accelerometer_count = 0; // positive means enabled | 35 | static int enable_accelerometer_count = 0; // positive means enabled |
| 36 | static int enable_gyroscope_count = 0; // positive means enabled | 36 | static int enable_gyroscope_count = 0; // positive means enabled |
| 37 | 37 | ||
| 38 | static int pad_update_event; | ||
| 39 | static int accelerometer_update_event; | ||
| 40 | static int gyroscope_update_event; | ||
| 41 | |||
| 42 | // Updating period for each HID device. These empirical values are measured from a 11.2 3DS. | ||
| 43 | constexpr u64 pad_update_ticks = BASE_CLOCK_RATE_ARM11 / 234; | ||
| 44 | constexpr u64 accelerometer_update_ticks = BASE_CLOCK_RATE_ARM11 / 104; | ||
| 45 | constexpr u64 gyroscope_update_ticks = BASE_CLOCK_RATE_ARM11 / 101; | ||
| 46 | |||
| 38 | static PadState GetCirclePadDirectionState(s16 circle_pad_x, s16 circle_pad_y) { | 47 | static PadState GetCirclePadDirectionState(s16 circle_pad_x, s16 circle_pad_y) { |
| 39 | // 30 degree and 60 degree are angular thresholds for directions | 48 | // 30 degree and 60 degree are angular thresholds for directions |
| 40 | constexpr float TAN30 = 0.577350269f; | 49 | constexpr float TAN30 = 0.577350269f; |
| @@ -65,14 +74,9 @@ static PadState GetCirclePadDirectionState(s16 circle_pad_x, s16 circle_pad_y) { | |||
| 65 | return state; | 74 | return state; |
| 66 | } | 75 | } |
| 67 | 76 | ||
| 68 | void Update() { | 77 | static void UpdatePadCallback(u64 userdata, int cycles_late) { |
| 69 | SharedMem* mem = reinterpret_cast<SharedMem*>(shared_mem->GetPointer()); | 78 | SharedMem* mem = reinterpret_cast<SharedMem*>(shared_mem->GetPointer()); |
| 70 | 79 | ||
| 71 | if (mem == nullptr) { | ||
| 72 | LOG_DEBUG(Service_HID, "Cannot update HID prior to mapping shared memory!"); | ||
| 73 | return; | ||
| 74 | } | ||
| 75 | |||
| 76 | PadState state = VideoCore::g_emu_window->GetPadState(); | 80 | PadState state = VideoCore::g_emu_window->GetPadState(); |
| 77 | 81 | ||
| 78 | // Get current circle pad position and update circle pad direction | 82 | // Get current circle pad position and update circle pad direction |
| @@ -131,59 +135,68 @@ void Update() { | |||
| 131 | event_pad_or_touch_1->Signal(); | 135 | event_pad_or_touch_1->Signal(); |
| 132 | event_pad_or_touch_2->Signal(); | 136 | event_pad_or_touch_2->Signal(); |
| 133 | 137 | ||
| 134 | // Update accelerometer | 138 | // Reschedule recurrent event |
| 135 | if (enable_accelerometer_count > 0) { | 139 | CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); |
| 136 | mem->accelerometer.index = next_accelerometer_index; | 140 | } |
| 137 | next_accelerometer_index = | 141 | |
| 138 | (next_accelerometer_index + 1) % mem->accelerometer.entries.size(); | 142 | static void UpdateAccelerometerCallback(u64 userdata, int cycles_late) { |
| 139 | 143 | SharedMem* mem = reinterpret_cast<SharedMem*>(shared_mem->GetPointer()); | |
| 140 | AccelerometerDataEntry& accelerometer_entry = | 144 | |
| 141 | mem->accelerometer.entries[mem->accelerometer.index]; | 145 | mem->accelerometer.index = next_accelerometer_index; |
| 142 | std::tie(accelerometer_entry.x, accelerometer_entry.y, accelerometer_entry.z) = | 146 | next_accelerometer_index = (next_accelerometer_index + 1) % mem->accelerometer.entries.size(); |
| 143 | VideoCore::g_emu_window->GetAccelerometerState(); | ||
| 144 | |||
| 145 | // Make up "raw" entry | ||
| 146 | // TODO(wwylele): | ||
| 147 | // From hardware testing, the raw_entry values are approximately, | ||
| 148 | // but not exactly, as twice as corresponding entries (or with a minus sign). | ||
| 149 | // It may caused by system calibration to the accelerometer. | ||
| 150 | // Figure out how it works, or, if no game reads raw_entry, | ||
| 151 | // the following three lines can be removed and leave raw_entry unimplemented. | ||
| 152 | mem->accelerometer.raw_entry.x = -2 * accelerometer_entry.x; | ||
| 153 | mem->accelerometer.raw_entry.z = 2 * accelerometer_entry.y; | ||
| 154 | mem->accelerometer.raw_entry.y = -2 * accelerometer_entry.z; | ||
| 155 | |||
| 156 | // If we just updated index 0, provide a new timestamp | ||
| 157 | if (mem->accelerometer.index == 0) { | ||
| 158 | mem->accelerometer.index_reset_ticks_previous = mem->accelerometer.index_reset_ticks; | ||
| 159 | mem->accelerometer.index_reset_ticks = (s64)CoreTiming::GetTicks(); | ||
| 160 | } | ||
| 161 | 147 | ||
| 162 | event_accelerometer->Signal(); | 148 | AccelerometerDataEntry& accelerometer_entry = |
| 149 | mem->accelerometer.entries[mem->accelerometer.index]; | ||
| 150 | std::tie(accelerometer_entry.x, accelerometer_entry.y, accelerometer_entry.z) = | ||
| 151 | VideoCore::g_emu_window->GetAccelerometerState(); | ||
| 152 | |||
| 153 | // Make up "raw" entry | ||
| 154 | // TODO(wwylele): | ||
| 155 | // From hardware testing, the raw_entry values are approximately, but not exactly, as twice as | ||
| 156 | // corresponding entries (or with a minus sign). It may caused by system calibration to the | ||
| 157 | // accelerometer. Figure out how it works, or, if no game reads raw_entry, the following three | ||
| 158 | // lines can be removed and leave raw_entry unimplemented. | ||
| 159 | mem->accelerometer.raw_entry.x = -2 * accelerometer_entry.x; | ||
| 160 | mem->accelerometer.raw_entry.z = 2 * accelerometer_entry.y; | ||
| 161 | mem->accelerometer.raw_entry.y = -2 * accelerometer_entry.z; | ||
| 162 | |||
| 163 | // If we just updated index 0, provide a new timestamp | ||
| 164 | if (mem->accelerometer.index == 0) { | ||
| 165 | mem->accelerometer.index_reset_ticks_previous = mem->accelerometer.index_reset_ticks; | ||
| 166 | mem->accelerometer.index_reset_ticks = (s64)CoreTiming::GetTicks(); | ||
| 163 | } | 167 | } |
| 164 | 168 | ||
| 165 | // Update gyroscope | 169 | event_accelerometer->Signal(); |
| 166 | if (enable_gyroscope_count > 0) { | ||
| 167 | mem->gyroscope.index = next_gyroscope_index; | ||
| 168 | next_gyroscope_index = (next_gyroscope_index + 1) % mem->gyroscope.entries.size(); | ||
| 169 | 170 | ||
| 170 | GyroscopeDataEntry& gyroscope_entry = mem->gyroscope.entries[mem->gyroscope.index]; | 171 | // Reschedule recurrent event |
| 171 | std::tie(gyroscope_entry.x, gyroscope_entry.y, gyroscope_entry.z) = | 172 | CoreTiming::ScheduleEvent(accelerometer_update_ticks - cycles_late, accelerometer_update_event); |
| 172 | VideoCore::g_emu_window->GetGyroscopeState(); | 173 | } |
| 173 | 174 | ||
| 174 | // Make up "raw" entry | 175 | static void UpdateGyroscopeCallback(u64 userdata, int cycles_late) { |
| 175 | mem->gyroscope.raw_entry.x = gyroscope_entry.x; | 176 | SharedMem* mem = reinterpret_cast<SharedMem*>(shared_mem->GetPointer()); |
| 176 | mem->gyroscope.raw_entry.z = -gyroscope_entry.y; | ||
| 177 | mem->gyroscope.raw_entry.y = gyroscope_entry.z; | ||
| 178 | 177 | ||
| 179 | // If we just updated index 0, provide a new timestamp | 178 | mem->gyroscope.index = next_gyroscope_index; |
| 180 | if (mem->gyroscope.index == 0) { | 179 | next_gyroscope_index = (next_gyroscope_index + 1) % mem->gyroscope.entries.size(); |
| 181 | mem->gyroscope.index_reset_ticks_previous = mem->gyroscope.index_reset_ticks; | 180 | |
| 182 | mem->gyroscope.index_reset_ticks = (s64)CoreTiming::GetTicks(); | 181 | GyroscopeDataEntry& gyroscope_entry = mem->gyroscope.entries[mem->gyroscope.index]; |
| 183 | } | 182 | std::tie(gyroscope_entry.x, gyroscope_entry.y, gyroscope_entry.z) = |
| 183 | VideoCore::g_emu_window->GetGyroscopeState(); | ||
| 184 | |||
| 185 | // Make up "raw" entry | ||
| 186 | mem->gyroscope.raw_entry.x = gyroscope_entry.x; | ||
| 187 | mem->gyroscope.raw_entry.z = -gyroscope_entry.y; | ||
| 188 | mem->gyroscope.raw_entry.y = gyroscope_entry.z; | ||
| 184 | 189 | ||
| 185 | event_gyroscope->Signal(); | 190 | // If we just updated index 0, provide a new timestamp |
| 191 | if (mem->gyroscope.index == 0) { | ||
| 192 | mem->gyroscope.index_reset_ticks_previous = mem->gyroscope.index_reset_ticks; | ||
| 193 | mem->gyroscope.index_reset_ticks = (s64)CoreTiming::GetTicks(); | ||
| 186 | } | 194 | } |
| 195 | |||
| 196 | event_gyroscope->Signal(); | ||
| 197 | |||
| 198 | // Reschedule recurrent event | ||
| 199 | CoreTiming::ScheduleEvent(gyroscope_update_ticks - cycles_late, gyroscope_update_event); | ||
| 187 | } | 200 | } |
| 188 | 201 | ||
| 189 | void GetIPCHandles(Service::Interface* self) { | 202 | void GetIPCHandles(Service::Interface* self) { |
| @@ -204,7 +217,11 @@ void EnableAccelerometer(Service::Interface* self) { | |||
| 204 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 217 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 205 | 218 | ||
| 206 | ++enable_accelerometer_count; | 219 | ++enable_accelerometer_count; |
| 207 | event_accelerometer->Signal(); | 220 | |
| 221 | // Schedules the accelerometer update event if the accelerometer was just enabled | ||
| 222 | if (enable_accelerometer_count == 1) { | ||
| 223 | CoreTiming::ScheduleEvent(accelerometer_update_ticks, accelerometer_update_event); | ||
| 224 | } | ||
| 208 | 225 | ||
| 209 | cmd_buff[1] = RESULT_SUCCESS.raw; | 226 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 210 | 227 | ||
| @@ -215,7 +232,11 @@ void DisableAccelerometer(Service::Interface* self) { | |||
| 215 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 232 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 216 | 233 | ||
| 217 | --enable_accelerometer_count; | 234 | --enable_accelerometer_count; |
| 218 | event_accelerometer->Signal(); | 235 | |
| 236 | // Unschedules the accelerometer update event if the accelerometer was just disabled | ||
| 237 | if (enable_accelerometer_count == 0) { | ||
| 238 | CoreTiming::UnscheduleEvent(accelerometer_update_event, 0); | ||
| 239 | } | ||
| 219 | 240 | ||
| 220 | cmd_buff[1] = RESULT_SUCCESS.raw; | 241 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 221 | 242 | ||
| @@ -226,7 +247,11 @@ void EnableGyroscopeLow(Service::Interface* self) { | |||
| 226 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 247 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 227 | 248 | ||
| 228 | ++enable_gyroscope_count; | 249 | ++enable_gyroscope_count; |
| 229 | event_gyroscope->Signal(); | 250 | |
| 251 | // Schedules the gyroscope update event if the gyroscope was just enabled | ||
| 252 | if (enable_gyroscope_count == 1) { | ||
| 253 | CoreTiming::ScheduleEvent(gyroscope_update_ticks, gyroscope_update_event); | ||
| 254 | } | ||
| 230 | 255 | ||
| 231 | cmd_buff[1] = RESULT_SUCCESS.raw; | 256 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 232 | 257 | ||
| @@ -237,7 +262,11 @@ void DisableGyroscopeLow(Service::Interface* self) { | |||
| 237 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 262 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 238 | 263 | ||
| 239 | --enable_gyroscope_count; | 264 | --enable_gyroscope_count; |
| 240 | event_gyroscope->Signal(); | 265 | |
| 266 | // Unschedules the gyroscope update event if the gyroscope was just disabled | ||
| 267 | if (enable_gyroscope_count == 0) { | ||
| 268 | CoreTiming::UnscheduleEvent(gyroscope_update_event, 0); | ||
| 269 | } | ||
| 241 | 270 | ||
| 242 | cmd_buff[1] = RESULT_SUCCESS.raw; | 271 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 243 | 272 | ||
| @@ -291,6 +320,8 @@ void Init() { | |||
| 291 | 320 | ||
| 292 | next_pad_index = 0; | 321 | next_pad_index = 0; |
| 293 | next_touch_index = 0; | 322 | next_touch_index = 0; |
| 323 | next_accelerometer_index = 0; | ||
| 324 | next_gyroscope_index = 0; | ||
| 294 | 325 | ||
| 295 | // Create event handles | 326 | // Create event handles |
| 296 | event_pad_or_touch_1 = Event::Create(ResetType::OneShot, "HID:EventPadOrTouch1"); | 327 | event_pad_or_touch_1 = Event::Create(ResetType::OneShot, "HID:EventPadOrTouch1"); |
| @@ -298,6 +329,15 @@ void Init() { | |||
| 298 | event_accelerometer = Event::Create(ResetType::OneShot, "HID:EventAccelerometer"); | 329 | event_accelerometer = Event::Create(ResetType::OneShot, "HID:EventAccelerometer"); |
| 299 | event_gyroscope = Event::Create(ResetType::OneShot, "HID:EventGyroscope"); | 330 | event_gyroscope = Event::Create(ResetType::OneShot, "HID:EventGyroscope"); |
| 300 | event_debug_pad = Event::Create(ResetType::OneShot, "HID:EventDebugPad"); | 331 | event_debug_pad = Event::Create(ResetType::OneShot, "HID:EventDebugPad"); |
| 332 | |||
| 333 | // Register update callbacks | ||
| 334 | pad_update_event = CoreTiming::RegisterEvent("HID::UpdatePadCallback", UpdatePadCallback); | ||
| 335 | accelerometer_update_event = | ||
| 336 | CoreTiming::RegisterEvent("HID::UpdateAccelerometerCallback", UpdateAccelerometerCallback); | ||
| 337 | gyroscope_update_event = | ||
| 338 | CoreTiming::RegisterEvent("HID::UpdateGyroscopeCallback", UpdateGyroscopeCallback); | ||
| 339 | |||
| 340 | CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event); | ||
| 301 | } | 341 | } |
| 302 | 342 | ||
| 303 | void Shutdown() { | 343 | void Shutdown() { |
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 7904e7355..21e66dfe0 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h | |||
| @@ -296,9 +296,6 @@ void GetGyroscopeLowRawToDpsCoefficient(Service::Interface* self); | |||
| 296 | */ | 296 | */ |
| 297 | void GetGyroscopeLowCalibrateParam(Service::Interface* self); | 297 | void GetGyroscopeLowCalibrateParam(Service::Interface* self); |
| 298 | 298 | ||
| 299 | /// Checks for user input updates | ||
| 300 | void Update(); | ||
| 301 | |||
| 302 | /// Initialize HID service | 299 | /// Initialize HID service |
| 303 | void Init(); | 300 | void Init(); |
| 304 | 301 | ||
diff --git a/src/core/hle/service/mic_u.cpp b/src/core/hle/service/mic_u.cpp index c62f8afc6..e98388560 100644 --- a/src/core/hle/service/mic_u.cpp +++ b/src/core/hle/service/mic_u.cpp | |||
| @@ -93,7 +93,7 @@ static void StartSampling(Interface* self) { | |||
| 93 | sample_rate = static_cast<SampleRate>(cmd_buff[2] & 0xFF); | 93 | sample_rate = static_cast<SampleRate>(cmd_buff[2] & 0xFF); |
| 94 | audio_buffer_offset = cmd_buff[3]; | 94 | audio_buffer_offset = cmd_buff[3]; |
| 95 | audio_buffer_size = cmd_buff[4]; | 95 | audio_buffer_size = cmd_buff[4]; |
| 96 | audio_buffer_loop = static_cast<bool>(cmd_buff[5] & 0xFF); | 96 | audio_buffer_loop = (cmd_buff[5] & 0xFF) != 0; |
| 97 | 97 | ||
| 98 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 98 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error |
| 99 | is_sampling = true; | 99 | is_sampling = true; |
| @@ -202,7 +202,7 @@ static void GetGain(Interface* self) { | |||
| 202 | */ | 202 | */ |
| 203 | static void SetPower(Interface* self) { | 203 | static void SetPower(Interface* self) { |
| 204 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 204 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 205 | mic_power = static_cast<bool>(cmd_buff[1] & 0xFF); | 205 | mic_power = (cmd_buff[1] & 0xFF) != 0; |
| 206 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 206 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error |
| 207 | LOG_WARNING(Service_MIC, "(STUBBED) called, mic_power=%u", mic_power); | 207 | LOG_WARNING(Service_MIC, "(STUBBED) called, mic_power=%u", mic_power); |
| 208 | } | 208 | } |
| @@ -252,7 +252,7 @@ static void SetIirFilterMic(Interface* self) { | |||
| 252 | */ | 252 | */ |
| 253 | static void SetClamp(Interface* self) { | 253 | static void SetClamp(Interface* self) { |
| 254 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 254 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 255 | clamp = static_cast<bool>(cmd_buff[1] & 0xFF); | 255 | clamp = (cmd_buff[1] & 0xFF) != 0; |
| 256 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 256 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error |
| 257 | LOG_WARNING(Service_MIC, "(STUBBED) called, clamp=%u", clamp); | 257 | LOG_WARNING(Service_MIC, "(STUBBED) called, clamp=%u", clamp); |
| 258 | } | 258 | } |
| @@ -282,7 +282,7 @@ static void GetClamp(Interface* self) { | |||
| 282 | */ | 282 | */ |
| 283 | static void SetAllowShellClosed(Interface* self) { | 283 | static void SetAllowShellClosed(Interface* self) { |
| 284 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 284 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 285 | allow_shell_closed = static_cast<bool>(cmd_buff[1] & 0xFF); | 285 | allow_shell_closed = (cmd_buff[1] & 0xFF) != 0; |
| 286 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 286 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error |
| 287 | LOG_WARNING(Service_MIC, "(STUBBED) called, allow_shell_closed=%u", allow_shell_closed); | 287 | LOG_WARNING(Service_MIC, "(STUBBED) called, allow_shell_closed=%u", allow_shell_closed); |
| 288 | } | 288 | } |
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp index e248285f9..fd3c7d9c2 100644 --- a/src/core/hle/service/nfc/nfc.cpp +++ b/src/core/hle/service/nfc/nfc.cpp | |||
| @@ -11,6 +11,81 @@ namespace Service { | |||
| 11 | namespace NFC { | 11 | namespace NFC { |
| 12 | 12 | ||
| 13 | static Kernel::SharedPtr<Kernel::Event> tag_in_range_event; | 13 | static Kernel::SharedPtr<Kernel::Event> tag_in_range_event; |
| 14 | static Kernel::SharedPtr<Kernel::Event> tag_out_of_range_event; | ||
| 15 | static TagState nfc_tag_state = TagState::NotInitialized; | ||
| 16 | static CommunicationStatus nfc_status = CommunicationStatus::NfcInitialized; | ||
| 17 | |||
| 18 | void Initialize(Interface* self) { | ||
| 19 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 20 | |||
| 21 | u8 param = static_cast<u8>(cmd_buff[1] & 0xFF); | ||
| 22 | |||
| 23 | nfc_tag_state = TagState::NotScanning; | ||
| 24 | |||
| 25 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 26 | LOG_WARNING(Service_NFC, "(STUBBED) called, param=%u", param); | ||
| 27 | } | ||
| 28 | |||
| 29 | void Shutdown(Interface* self) { | ||
| 30 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 31 | |||
| 32 | u8 param = static_cast<u8>(cmd_buff[1] & 0xFF); | ||
| 33 | nfc_tag_state = TagState::NotInitialized; | ||
| 34 | |||
| 35 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 36 | LOG_WARNING(Service_NFC, "(STUBBED) called, param=%u", param); | ||
| 37 | } | ||
| 38 | |||
| 39 | void StartCommunication(Interface* self) { | ||
| 40 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 41 | |||
| 42 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 43 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | ||
| 44 | } | ||
| 45 | |||
| 46 | void StopCommunication(Interface* self) { | ||
| 47 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 48 | |||
| 49 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 50 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | ||
| 51 | } | ||
| 52 | |||
| 53 | void StartTagScanning(Interface* self) { | ||
| 54 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 55 | |||
| 56 | nfc_tag_state = TagState::TagInRange; | ||
| 57 | tag_in_range_event->Signal(); | ||
| 58 | |||
| 59 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 60 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | ||
| 61 | } | ||
| 62 | |||
| 63 | void StopTagScanning(Interface* self) { | ||
| 64 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 65 | |||
| 66 | nfc_tag_state = TagState::NotScanning; | ||
| 67 | |||
| 68 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 69 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | ||
| 70 | } | ||
| 71 | |||
| 72 | void LoadAmiiboData(Interface* self) { | ||
| 73 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 74 | |||
| 75 | nfc_tag_state = TagState::TagDataLoaded; | ||
| 76 | |||
| 77 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 78 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | ||
| 79 | } | ||
| 80 | |||
| 81 | void ResetTagScanState(Interface* self) { | ||
| 82 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 83 | |||
| 84 | nfc_tag_state = TagState::NotScanning; | ||
| 85 | |||
| 86 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 87 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | ||
| 88 | } | ||
| 14 | 89 | ||
| 15 | void GetTagInRangeEvent(Interface* self) { | 90 | void GetTagInRangeEvent(Interface* self) { |
| 16 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 91 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| @@ -22,16 +97,46 @@ void GetTagInRangeEvent(Interface* self) { | |||
| 22 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | 97 | LOG_WARNING(Service_NFC, "(STUBBED) called"); |
| 23 | } | 98 | } |
| 24 | 99 | ||
| 100 | void GetTagOutOfRangeEvent(Interface* self) { | ||
| 101 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 102 | |||
| 103 | cmd_buff[0] = IPC::MakeHeader(0xC, 1, 2); | ||
| 104 | cmd_buff[1] = RESULT_SUCCESS.raw; | ||
| 105 | cmd_buff[2] = IPC::CopyHandleDesc(); | ||
| 106 | cmd_buff[3] = Kernel::g_handle_table.Create(tag_out_of_range_event).MoveFrom(); | ||
| 107 | LOG_WARNING(Service_NFC, "(STUBBED) called"); | ||
| 108 | } | ||
| 109 | |||
| 110 | void GetTagState(Interface* self) { | ||
| 111 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 112 | |||
| 113 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 114 | cmd_buff[2] = static_cast<u8>(nfc_tag_state); | ||
| 115 | LOG_DEBUG(Service_NFC, "(STUBBED) called"); | ||
| 116 | } | ||
| 117 | |||
| 118 | void CommunicationGetStatus(Interface* self) { | ||
| 119 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 120 | |||
| 121 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | ||
| 122 | cmd_buff[2] = static_cast<u8>(nfc_status); | ||
| 123 | LOG_DEBUG(Service_NFC, "(STUBBED) called"); | ||
| 124 | } | ||
| 125 | |||
| 25 | void Init() { | 126 | void Init() { |
| 26 | AddService(new NFC_M()); | 127 | AddService(new NFC_M()); |
| 27 | AddService(new NFC_U()); | 128 | AddService(new NFC_U()); |
| 28 | 129 | ||
| 29 | tag_in_range_event = | 130 | tag_in_range_event = |
| 30 | Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_in_range_event"); | 131 | Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_in_range_event"); |
| 132 | tag_out_of_range_event = | ||
| 133 | Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_out_range_event"); | ||
| 134 | nfc_tag_state = TagState::NotInitialized; | ||
| 31 | } | 135 | } |
| 32 | 136 | ||
| 33 | void Shutdown() { | 137 | void Shutdown() { |
| 34 | tag_in_range_event = nullptr; | 138 | tag_in_range_event = nullptr; |
| 139 | tag_out_of_range_event = nullptr; | ||
| 35 | } | 140 | } |
| 36 | 141 | ||
| 37 | } // namespace NFC | 142 | } // namespace NFC |
diff --git a/src/core/hle/service/nfc/nfc.h b/src/core/hle/service/nfc/nfc.h index b02354201..a013bdae7 100644 --- a/src/core/hle/service/nfc/nfc.h +++ b/src/core/hle/service/nfc/nfc.h | |||
| @@ -4,12 +4,103 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 7 | namespace Service { | 9 | namespace Service { |
| 8 | 10 | ||
| 9 | class Interface; | 11 | class Interface; |
| 10 | 12 | ||
| 11 | namespace NFC { | 13 | namespace NFC { |
| 12 | 14 | ||
| 15 | enum class TagState : u8 { | ||
| 16 | NotInitialized = 0, | ||
| 17 | NotScanning = 1, | ||
| 18 | Scanning = 2, | ||
| 19 | TagInRange = 3, | ||
| 20 | TagOutOfRange = 4, | ||
| 21 | TagDataLoaded = 5, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class CommunicationStatus : u8 { | ||
| 25 | AttemptInitialize = 1, | ||
| 26 | NfcInitialized = 2, | ||
| 27 | }; | ||
| 28 | |||
| 29 | /** | ||
| 30 | * NFC::Initialize service function | ||
| 31 | * Inputs: | ||
| 32 | * 0 : Header code [0x00010040] | ||
| 33 | * 1 : (u8) unknown parameter. Can be either value 0x1 or 0x2 | ||
| 34 | * Outputs: | ||
| 35 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 36 | */ | ||
| 37 | void Initialize(Interface* self); | ||
| 38 | |||
| 39 | /** | ||
| 40 | * NFC::Shutdown service function | ||
| 41 | * Inputs: | ||
| 42 | * 0 : Header code [0x00020040] | ||
| 43 | * 1 : (u8) unknown parameter | ||
| 44 | * Outputs: | ||
| 45 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 46 | */ | ||
| 47 | void Shutdown(Interface* self); | ||
| 48 | |||
| 49 | /** | ||
| 50 | * NFC::StartCommunication service function | ||
| 51 | * Inputs: | ||
| 52 | * 0 : Header code [0x00030000] | ||
| 53 | * Outputs: | ||
| 54 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 55 | */ | ||
| 56 | void StartCommunication(Interface* self); | ||
| 57 | |||
| 58 | /** | ||
| 59 | * NFC::StopCommunication service function | ||
| 60 | * Inputs: | ||
| 61 | * 0 : Header code [0x00040000] | ||
| 62 | * Outputs: | ||
| 63 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 64 | */ | ||
| 65 | void StopCommunication(Interface* self); | ||
| 66 | |||
| 67 | /** | ||
| 68 | * NFC::StartTagScanning service function | ||
| 69 | * Inputs: | ||
| 70 | * 0 : Header code [0x00050040] | ||
| 71 | * 1 : (u16) unknown. This is normally 0x0 | ||
| 72 | * Outputs: | ||
| 73 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 74 | */ | ||
| 75 | void StartTagScanning(Interface* self); | ||
| 76 | |||
| 77 | /** | ||
| 78 | * NFC::StopTagScanning service function | ||
| 79 | * Inputs: | ||
| 80 | * 0 : Header code [0x00060000] | ||
| 81 | * Outputs: | ||
| 82 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 83 | */ | ||
| 84 | void StopTagScanning(Interface* self); | ||
| 85 | |||
| 86 | /** | ||
| 87 | * NFC::LoadAmiiboData service function | ||
| 88 | * Inputs: | ||
| 89 | * 0 : Header code [0x00070000] | ||
| 90 | * Outputs: | ||
| 91 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 92 | */ | ||
| 93 | void LoadAmiiboData(Interface* self); | ||
| 94 | |||
| 95 | /** | ||
| 96 | * NFC::ResetTagScanState service function | ||
| 97 | * Inputs: | ||
| 98 | * 0 : Header code [0x00080000] | ||
| 99 | * Outputs: | ||
| 100 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 101 | */ | ||
| 102 | void ResetTagScanState(Interface* self); | ||
| 103 | |||
| 13 | /** | 104 | /** |
| 14 | * NFC::GetTagInRangeEvent service function | 105 | * NFC::GetTagInRangeEvent service function |
| 15 | * Inputs: | 106 | * Inputs: |
| @@ -21,6 +112,37 @@ namespace NFC { | |||
| 21 | */ | 112 | */ |
| 22 | void GetTagInRangeEvent(Interface* self); | 113 | void GetTagInRangeEvent(Interface* self); |
| 23 | 114 | ||
| 115 | /** | ||
| 116 | * NFC::GetTagOutOfRangeEvent service function | ||
| 117 | * Inputs: | ||
| 118 | * 0 : Header code [0x000C0000] | ||
| 119 | * Outputs: | ||
| 120 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 121 | * 2 : Copy handle descriptor | ||
| 122 | * 3 : Event Handle | ||
| 123 | */ | ||
| 124 | void GetTagOutOfRangeEvent(Interface* self); | ||
| 125 | |||
| 126 | /** | ||
| 127 | * NFC::GetTagState service function | ||
| 128 | * Inputs: | ||
| 129 | * 0 : Header code [0x000D0000] | ||
| 130 | * Outputs: | ||
| 131 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 132 | * 2 : (u8) Tag state | ||
| 133 | */ | ||
| 134 | void GetTagState(Interface* self); | ||
| 135 | |||
| 136 | /** | ||
| 137 | * NFC::CommunicationGetStatus service function | ||
| 138 | * Inputs: | ||
| 139 | * 0 : Header code [0x000F0000] | ||
| 140 | * Outputs: | ||
| 141 | * 1 : Result of function, 0 on success, otherwise error code | ||
| 142 | * 2 : (u8) Communication state | ||
| 143 | */ | ||
| 144 | void CommunicationGetStatus(Interface* self); | ||
| 145 | |||
| 24 | /// Initialize all NFC services. | 146 | /// Initialize all NFC services. |
| 25 | void Init(); | 147 | void Init(); |
| 26 | 148 | ||
diff --git a/src/core/hle/service/nfc/nfc_m.cpp b/src/core/hle/service/nfc/nfc_m.cpp index f43b4029a..ebe637650 100644 --- a/src/core/hle/service/nfc/nfc_m.cpp +++ b/src/core/hle/service/nfc/nfc_m.cpp | |||
| @@ -11,18 +11,19 @@ namespace NFC { | |||
| 11 | const Interface::FunctionInfo FunctionTable[] = { | 11 | const Interface::FunctionInfo FunctionTable[] = { |
| 12 | // clang-format off | 12 | // clang-format off |
| 13 | // nfc:u shared commands | 13 | // nfc:u shared commands |
| 14 | {0x00010040, nullptr, "Initialize"}, | 14 | {0x00010040, Initialize, "Initialize"}, |
| 15 | {0x00020040, nullptr, "Shutdown"}, | 15 | {0x00020040, Shutdown, "Shutdown"}, |
| 16 | {0x00030000, nullptr, "StartCommunication"}, | 16 | {0x00030000, StartCommunication, "StartCommunication"}, |
| 17 | {0x00040000, nullptr, "StopCommunication"}, | 17 | {0x00040000, StopCommunication, "StopCommunication"}, |
| 18 | {0x00050040, nullptr, "StartTagScanning"}, | 18 | {0x00050040, StartTagScanning, "StartTagScanning"}, |
| 19 | {0x00060000, nullptr, "StopTagScanning"}, | 19 | {0x00060000, StopTagScanning, "StopTagScanning"}, |
| 20 | {0x00070000, nullptr, "LoadAmiiboData"}, | 20 | {0x00070000, LoadAmiiboData, "LoadAmiiboData"}, |
| 21 | {0x00080000, nullptr, "ResetTagScanState"}, | 21 | {0x00080000, ResetTagScanState, "ResetTagScanState"}, |
| 22 | {0x00090002, nullptr, "UpdateStoredAmiiboData"}, | 22 | {0x00090002, nullptr, "UpdateStoredAmiiboData"}, |
| 23 | {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"}, | 23 | {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"}, |
| 24 | {0x000D0000, nullptr, "GetTagState"}, | 24 | {0x000C0000, GetTagOutOfRangeEvent, "GetTagOutOfRangeEvent"}, |
| 25 | {0x000F0000, nullptr, "CommunicationGetStatus"}, | 25 | {0x000D0000, GetTagState, "GetTagState"}, |
| 26 | {0x000F0000, CommunicationGetStatus, "CommunicationGetStatus"}, | ||
| 26 | {0x00100000, nullptr, "GetTagInfo2"}, | 27 | {0x00100000, nullptr, "GetTagInfo2"}, |
| 27 | {0x00110000, nullptr, "GetTagInfo"}, | 28 | {0x00110000, nullptr, "GetTagInfo"}, |
| 28 | {0x00120000, nullptr, "CommunicationGetResult"}, | 29 | {0x00120000, nullptr, "CommunicationGetResult"}, |
diff --git a/src/core/hle/service/nfc/nfc_u.cpp b/src/core/hle/service/nfc/nfc_u.cpp index 4b5200ae8..5a40c7874 100644 --- a/src/core/hle/service/nfc/nfc_u.cpp +++ b/src/core/hle/service/nfc/nfc_u.cpp | |||
| @@ -10,18 +10,19 @@ namespace NFC { | |||
| 10 | 10 | ||
| 11 | const Interface::FunctionInfo FunctionTable[] = { | 11 | const Interface::FunctionInfo FunctionTable[] = { |
| 12 | // clang-format off | 12 | // clang-format off |
| 13 | {0x00010040, nullptr, "Initialize"}, | 13 | {0x00010040, Initialize, "Initialize"}, |
| 14 | {0x00020040, nullptr, "Shutdown"}, | 14 | {0x00020040, Shutdown, "Shutdown"}, |
| 15 | {0x00030000, nullptr, "StartCommunication"}, | 15 | {0x00030000, StartCommunication, "StartCommunication"}, |
| 16 | {0x00040000, nullptr, "StopCommunication"}, | 16 | {0x00040000, StopCommunication, "StopCommunication"}, |
| 17 | {0x00050040, nullptr, "StartTagScanning"}, | 17 | {0x00050040, StartTagScanning, "StartTagScanning"}, |
| 18 | {0x00060000, nullptr, "StopTagScanning"}, | 18 | {0x00060000, StopTagScanning, "StopTagScanning"}, |
| 19 | {0x00070000, nullptr, "LoadAmiiboData"}, | 19 | {0x00070000, LoadAmiiboData, "LoadAmiiboData"}, |
| 20 | {0x00080000, nullptr, "ResetTagScanState"}, | 20 | {0x00080000, ResetTagScanState, "ResetTagScanState"}, |
| 21 | {0x00090002, nullptr, "UpdateStoredAmiiboData"}, | 21 | {0x00090002, nullptr, "UpdateStoredAmiiboData"}, |
| 22 | {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"}, | 22 | {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"}, |
| 23 | {0x000D0000, nullptr, "GetTagState"}, | 23 | {0x000C0000, GetTagOutOfRangeEvent, "GetTagOutOfRangeEvent"}, |
| 24 | {0x000F0000, nullptr, "CommunicationGetStatus"}, | 24 | {0x000D0000, GetTagState, "GetTagState"}, |
| 25 | {0x000F0000, CommunicationGetStatus, "CommunicationGetStatus"}, | ||
| 25 | {0x00100000, nullptr, "GetTagInfo2"}, | 26 | {0x00100000, nullptr, "GetTagInfo2"}, |
| 26 | {0x00110000, nullptr, "GetTagInfo"}, | 27 | {0x00110000, nullptr, "GetTagInfo"}, |
| 27 | {0x00120000, nullptr, "CommunicationGetResult"}, | 28 | {0x00120000, nullptr, "CommunicationGetResult"}, |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 1a1ee90b2..fa8c13d36 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include "common/vector_math.h" | 15 | #include "common/vector_math.h" |
| 16 | #include "core/core_timing.h" | 16 | #include "core/core_timing.h" |
| 17 | #include "core/hle/service/gsp_gpu.h" | 17 | #include "core/hle/service/gsp_gpu.h" |
| 18 | #include "core/hle/service/hid/hid.h" | ||
| 19 | #include "core/hw/gpu.h" | 18 | #include "core/hw/gpu.h" |
| 20 | #include "core/hw/hw.h" | 19 | #include "core/hw/hw.h" |
| 21 | #include "core/memory.h" | 20 | #include "core/memory.h" |
| @@ -33,7 +32,7 @@ namespace GPU { | |||
| 33 | Regs g_regs; | 32 | Regs g_regs; |
| 34 | 33 | ||
| 35 | /// 268MHz CPU clocks / 60Hz frames per second | 34 | /// 268MHz CPU clocks / 60Hz frames per second |
| 36 | const u64 frame_ticks = 268123480ull / 60; | 35 | const u64 frame_ticks = BASE_CLOCK_RATE_ARM11 / 60; |
| 37 | /// Event id for CoreTiming | 36 | /// Event id for CoreTiming |
| 38 | static int vblank_event; | 37 | static int vblank_event; |
| 39 | /// Total number of frames drawn | 38 | /// Total number of frames drawn |
| @@ -551,9 +550,6 @@ static void VBlankCallback(u64 userdata, int cycles_late) { | |||
| 551 | Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0); | 550 | Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0); |
| 552 | Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1); | 551 | Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1); |
| 553 | 552 | ||
| 554 | // Check for user input updates | ||
| 555 | Service::HID::Update(); | ||
| 556 | |||
| 557 | if (!Settings::values.use_vsync && Settings::values.toggle_framelimit) { | 553 | if (!Settings::values.use_vsync && Settings::values.toggle_framelimit) { |
| 558 | FrameLimiter(); | 554 | FrameLimiter(); |
| 559 | } | 555 | } |
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 1c10740a0..09266e8b0 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp | |||
| @@ -177,18 +177,34 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, | |||
| 177 | pos += table.skip; | 177 | pos += table.skip; |
| 178 | s32 num_patches = table.patch; | 178 | s32 num_patches = table.patch; |
| 179 | while (0 < num_patches && pos < end_pos) { | 179 | while (0 < num_patches && pos < end_pos) { |
| 180 | u32 in_addr = | 180 | u32 in_addr = base_addr + static_cast<u32>(reinterpret_cast<u8*>(pos) - |
| 181 | static_cast<u32>(reinterpret_cast<u8*>(pos) - program_image.data()); | 181 | program_image.data()); |
| 182 | u32 addr = TranslateAddr(*pos, &loadinfo, offsets); | 182 | u32 orig_data = *pos; |
| 183 | LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", | 183 | u32 sub_type = orig_data >> (32 - 4); |
| 184 | base_addr + in_addr, addr, current_segment_reloc_table, *pos); | 184 | u32 addr = TranslateAddr(orig_data & ~0xF0000000, &loadinfo, offsets); |
| 185 | LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", in_addr, addr, | ||
| 186 | current_segment_reloc_table, *pos); | ||
| 185 | switch (current_segment_reloc_table) { | 187 | switch (current_segment_reloc_table) { |
| 186 | case 0: | 188 | case 0: { |
| 187 | *pos = (addr); | 189 | if (sub_type != 0) |
| 190 | return ERROR_READ; | ||
| 191 | *pos = addr; | ||
| 188 | break; | 192 | break; |
| 189 | case 1: | 193 | } |
| 190 | *pos = static_cast<u32>(addr - in_addr); | 194 | case 1: { |
| 195 | u32 data = addr - in_addr; | ||
| 196 | switch (sub_type) { | ||
| 197 | case 0: // 32-bit signed offset | ||
| 198 | *pos = data; | ||
| 199 | break; | ||
| 200 | case 1: // 31-bit signed offset | ||
| 201 | *pos = data & ~(1U << 31); | ||
| 202 | break; | ||
| 203 | default: | ||
| 204 | return ERROR_READ; | ||
| 205 | } | ||
| 191 | break; | 206 | break; |
| 207 | } | ||
| 192 | default: | 208 | default: |
| 193 | break; // this should never happen | 209 | break; // this should never happen |
| 194 | } | 210 | } |
diff --git a/src/core/settings.h b/src/core/settings.h index 8dbda653a..e22ce0f16 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -104,6 +104,7 @@ struct Values { | |||
| 104 | // Audio | 104 | // Audio |
| 105 | std::string sink_id; | 105 | std::string sink_id; |
| 106 | bool enable_audio_stretching; | 106 | bool enable_audio_stretching; |
| 107 | std::string audio_device_id; | ||
| 107 | 108 | ||
| 108 | // Debugging | 109 | // Debugging |
| 109 | bool use_gdbstub; | 110 | bool use_gdbstub; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6ca319b59..d55b84ce0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -50,10 +50,12 @@ set(HEADERS | |||
| 50 | 50 | ||
| 51 | if(ARCHITECTURE_x86_64) | 51 | if(ARCHITECTURE_x86_64) |
| 52 | set(SRCS ${SRCS} | 52 | set(SRCS ${SRCS} |
| 53 | shader/shader_jit_x64.cpp) | 53 | shader/shader_jit_x64.cpp |
| 54 | shader/shader_jit_x64_compiler.cpp) | ||
| 54 | 55 | ||
| 55 | set(HEADERS ${HEADERS} | 56 | set(HEADERS ${HEADERS} |
| 56 | shader/shader_jit_x64.h) | 57 | shader/shader_jit_x64.h |
| 58 | shader/shader_jit_x64_compiler.h) | ||
| 57 | endif() | 59 | endif() |
| 58 | 60 | ||
| 59 | create_directory_groups(${SRCS} ${HEADERS}) | 61 | create_directory_groups(${SRCS} ${HEADERS}) |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index ea58e9f54..eb79974a8 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -142,16 +142,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 142 | MICROPROFILE_SCOPE(GPU_Drawing); | 142 | MICROPROFILE_SCOPE(GPU_Drawing); |
| 143 | immediate_attribute_id = 0; | 143 | immediate_attribute_id = 0; |
| 144 | 144 | ||
| 145 | Shader::UnitState shader_unit; | 145 | auto* shader_engine = Shader::GetEngine(); |
| 146 | g_state.vs.Setup(); | 146 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); |
| 147 | 147 | ||
| 148 | // Send to vertex shader | 148 | // Send to vertex shader |
| 149 | if (g_debug_context) | 149 | if (g_debug_context) |
| 150 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | 150 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, |
| 151 | static_cast<void*>(&immediate_input)); | 151 | static_cast<void*>(&immediate_input)); |
| 152 | g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1); | 152 | Shader::UnitState shader_unit; |
| 153 | Shader::OutputVertex output_vertex = | 153 | shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); |
| 154 | shader_unit.output_registers.ToVertex(regs.vs); | 154 | shader_engine->Run(g_state.vs, shader_unit); |
| 155 | auto output_vertex = Shader::OutputVertex::FromRegisters( | ||
| 156 | shader_unit.registers.output, regs, regs.vs.output_mask); | ||
| 155 | 157 | ||
| 156 | // Send to renderer | 158 | // Send to renderer |
| 157 | using Pica::Shader::OutputVertex; | 159 | using Pica::Shader::OutputVertex; |
| @@ -243,8 +245,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 243 | unsigned int vertex_cache_pos = 0; | 245 | unsigned int vertex_cache_pos = 0; |
| 244 | vertex_cache_ids.fill(-1); | 246 | vertex_cache_ids.fill(-1); |
| 245 | 247 | ||
| 248 | auto* shader_engine = Shader::GetEngine(); | ||
| 246 | Shader::UnitState shader_unit; | 249 | Shader::UnitState shader_unit; |
| 247 | g_state.vs.Setup(); | 250 | |
| 251 | shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | ||
| 248 | 252 | ||
| 249 | for (unsigned int index = 0; index < regs.num_vertices; ++index) { | 253 | for (unsigned int index = 0; index < regs.num_vertices; ++index) { |
| 250 | // Indexed rendering doesn't use the start offset | 254 | // Indexed rendering doesn't use the start offset |
| @@ -283,10 +287,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 283 | if (g_debug_context) | 287 | if (g_debug_context) |
| 284 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, | 288 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, |
| 285 | (void*)&input); | 289 | (void*)&input); |
| 286 | g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); | 290 | shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); |
| 291 | shader_engine->Run(g_state.vs, shader_unit); | ||
| 287 | 292 | ||
| 288 | // Retrieve vertex from register data | 293 | // Retrieve vertex from register data |
| 289 | output_vertex = shader_unit.output_registers.ToVertex(regs.vs); | 294 | output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, |
| 295 | regs, regs.vs.output_mask); | ||
| 290 | 296 | ||
| 291 | if (is_indexed) { | 297 | if (is_indexed) { |
| 292 | vertex_cache[vertex_cache_pos] = output_vertex; | 298 | vertex_cache[vertex_cache_pos] = output_vertex; |
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index ce2bd455e..b4a77c632 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp | |||
| @@ -499,7 +499,7 @@ void Init() { | |||
| 499 | } | 499 | } |
| 500 | 500 | ||
| 501 | void Shutdown() { | 501 | void Shutdown() { |
| 502 | Shader::ClearCache(); | 502 | Shader::Shutdown(); |
| 503 | } | 503 | } |
| 504 | 504 | ||
| 505 | template <typename T> | 505 | template <typename T> |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5a306a5c8..f3674e965 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -716,8 +716,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { | |||
| 716 | 716 | ||
| 717 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { | 717 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { |
| 718 | MICROPROFILE_SCOPE(OpenGL_Blits); | 718 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 719 | using PixelFormat = CachedSurface::PixelFormat; | ||
| 720 | using SurfaceType = CachedSurface::SurfaceType; | ||
| 721 | 719 | ||
| 722 | CachedSurface src_params; | 720 | CachedSurface src_params; |
| 723 | src_params.addr = config.GetPhysicalInputAddress(); | 721 | src_params.addr = config.GetPhysicalInputAddress(); |
| @@ -748,7 +746,8 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe | |||
| 748 | 746 | ||
| 749 | // Adjust the source rectangle to take into account parts of the input lines being cropped | 747 | // Adjust the source rectangle to take into account parts of the input lines being cropped |
| 750 | if (config.input_width > config.output_width) { | 748 | if (config.input_width > config.output_width) { |
| 751 | src_rect.right -= (config.input_width - config.output_width) * src_surface->res_scale_width; | 749 | src_rect.right -= static_cast<int>((config.input_width - config.output_width) * |
| 750 | src_surface->res_scale_width); | ||
| 752 | } | 751 | } |
| 753 | 752 | ||
| 754 | // Require destination surface to have same resolution scale as source to preserve scaling | 753 | // Require destination surface to have same resolution scale as source to preserve scaling |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e1a9cb361..cc3e4bed5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -76,7 +76,7 @@ union PicaShaderConfig { | |||
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | state.fog_mode = regs.fog_mode; | 78 | state.fog_mode = regs.fog_mode; |
| 79 | state.fog_flip = regs.fog_flip; | 79 | state.fog_flip = regs.fog_flip != 0; |
| 80 | 80 | ||
| 81 | state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | | 81 | state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | |
| 82 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; | 82 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ef3b06a7b..1e7eedecb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -172,7 +172,6 @@ bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, | |||
| 172 | const MathUtil::Rectangle<int>& src_rect, | 172 | const MathUtil::Rectangle<int>& src_rect, |
| 173 | CachedSurface* dst_surface, | 173 | CachedSurface* dst_surface, |
| 174 | const MathUtil::Rectangle<int>& dst_rect) { | 174 | const MathUtil::Rectangle<int>& dst_rect) { |
| 175 | using SurfaceType = CachedSurface::SurfaceType; | ||
| 176 | 175 | ||
| 177 | if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, | 176 | if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, |
| 178 | dst_surface->pixel_format)) { | 177 | dst_surface->pixel_format)) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index b50e8292b..f57fdb3cc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -8,7 +8,14 @@ | |||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <set> | 9 | #include <set> |
| 10 | #include <tuple> | 10 | #include <tuple> |
| 11 | #ifdef __GNUC__ | ||
| 12 | #pragma GCC diagnostic push | ||
| 13 | #pragma GCC diagnostic ignored "-Wunused-local-typedef" | ||
| 14 | #endif | ||
| 11 | #include <boost/icl/interval_map.hpp> | 15 | #include <boost/icl/interval_map.hpp> |
| 16 | #ifdef __GNUC__ | ||
| 17 | #pragma GCC diagnostic pop | ||
| 18 | #endif | ||
| 12 | #include <glad/glad.h> | 19 | #include <glad/glad.h> |
| 13 | #include "common/assert.h" | 20 | #include "common/assert.h" |
| 14 | #include "common/common_funcs.h" | 21 | #include "common/common_funcs.h" |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index a4aa3c9e0..2da50bd62 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -2,14 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | #include <cmath> | 5 | #include <cmath> |
| 7 | #include <cstring> | 6 | #include <cstring> |
| 8 | #include <unordered_map> | ||
| 9 | #include <utility> | ||
| 10 | #include <boost/range/algorithm/fill.hpp> | ||
| 11 | #include "common/bit_field.h" | ||
| 12 | #include "common/hash.h" | ||
| 13 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 14 | #include "common/microprofile.h" | 8 | #include "common/microprofile.h" |
| 15 | #include "video_core/pica.h" | 9 | #include "video_core/pica.h" |
| @@ -25,7 +19,8 @@ namespace Pica { | |||
| 25 | 19 | ||
| 26 | namespace Shader { | 20 | namespace Shader { |
| 27 | 21 | ||
| 28 | OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | 22 | OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, |
| 23 | u32 output_mask) { | ||
| 29 | // Setup output data | 24 | // Setup output data |
| 30 | OutputVertex ret; | 25 | OutputVertex ret; |
| 31 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 26 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to |
| @@ -33,13 +28,13 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 33 | unsigned index = 0; | 28 | unsigned index = 0; |
| 34 | for (unsigned i = 0; i < 7; ++i) { | 29 | for (unsigned i = 0; i < 7; ++i) { |
| 35 | 30 | ||
| 36 | if (index >= g_state.regs.vs_output_total) | 31 | if (index >= regs.vs_output_total) |
| 37 | break; | 32 | break; |
| 38 | 33 | ||
| 39 | if ((config.output_mask & (1 << i)) == 0) | 34 | if ((output_mask & (1 << i)) == 0) |
| 40 | continue; | 35 | continue; |
| 41 | 36 | ||
| 42 | const auto& output_register_map = g_state.regs.vs_output_attributes[index]; | 37 | const auto& output_register_map = regs.vs_output_attributes[index]; |
| 43 | 38 | ||
| 44 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, | 39 | u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, |
| 45 | output_register_map.map_z, output_register_map.map_w}; | 40 | output_register_map.map_z, output_register_map.map_w}; |
| @@ -47,7 +42,7 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 47 | for (unsigned comp = 0; comp < 4; ++comp) { | 42 | for (unsigned comp = 0; comp < 4; ++comp) { |
| 48 | float24* out = ((float24*)&ret) + semantics[comp]; | 43 | float24* out = ((float24*)&ret) + semantics[comp]; |
| 49 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | 44 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |
| 50 | *out = value[i][comp]; | 45 | *out = output_regs[i][comp]; |
| 51 | } else { | 46 | } else { |
| 52 | // Zero output so that attributes which aren't output won't have denormals in them, | 47 | // Zero output so that attributes which aren't output won't have denormals in them, |
| 53 | // which would slow us down later. | 48 | // which would slow us down later. |
| @@ -76,86 +71,41 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { | |||
| 76 | return ret; | 71 | return ret; |
| 77 | } | 72 | } |
| 78 | 73 | ||
| 79 | #ifdef ARCHITECTURE_x86_64 | 74 | void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { |
| 80 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; | 75 | // Setup input register table |
| 81 | static const JitShader* jit_shader; | 76 | const auto& attribute_register_map = g_state.regs.vs.input_register_map; |
| 82 | #endif // ARCHITECTURE_x86_64 | 77 | |
| 78 | for (int i = 0; i < num_attributes; i++) | ||
| 79 | registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | ||
| 80 | } | ||
| 81 | |||
| 82 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||
| 83 | 83 | ||
| 84 | void ClearCache() { | ||
| 85 | #ifdef ARCHITECTURE_x86_64 | 84 | #ifdef ARCHITECTURE_x86_64 |
| 86 | shader_map.clear(); | 85 | static std::unique_ptr<JitX64Engine> jit_engine; |
| 87 | #endif // ARCHITECTURE_x86_64 | 86 | #endif // ARCHITECTURE_x86_64 |
| 88 | } | 87 | static InterpreterEngine interpreter_engine; |
| 89 | 88 | ||
| 90 | void ShaderSetup::Setup() { | 89 | ShaderEngine* GetEngine() { |
| 91 | #ifdef ARCHITECTURE_x86_64 | 90 | #ifdef ARCHITECTURE_x86_64 |
| 91 | // TODO(yuriks): Re-initialize on each change rather than being persistent | ||
| 92 | if (VideoCore::g_shader_jit_enabled) { | 92 | if (VideoCore::g_shader_jit_enabled) { |
| 93 | u64 cache_key = | 93 | if (jit_engine == nullptr) { |
| 94 | Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 94 | jit_engine = std::make_unique<JitX64Engine>(); |
| 95 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)); | ||
| 96 | |||
| 97 | auto iter = shader_map.find(cache_key); | ||
| 98 | if (iter != shader_map.end()) { | ||
| 99 | jit_shader = iter->second.get(); | ||
| 100 | } else { | ||
| 101 | auto shader = std::make_unique<JitShader>(); | ||
| 102 | shader->Compile(); | ||
| 103 | jit_shader = shader.get(); | ||
| 104 | shader_map[cache_key] = std::move(shader); | ||
| 105 | } | 95 | } |
| 96 | return jit_engine.get(); | ||
| 106 | } | 97 | } |
| 107 | #endif // ARCHITECTURE_x86_64 | 98 | #endif // ARCHITECTURE_x86_64 |
| 108 | } | ||
| 109 | |||
| 110 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||
| 111 | |||
| 112 | void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { | ||
| 113 | auto& config = g_state.regs.vs; | ||
| 114 | auto& setup = g_state.vs; | ||
| 115 | |||
| 116 | MICROPROFILE_SCOPE(GPU_Shader); | ||
| 117 | 99 | ||
| 118 | // Setup input register table | 100 | return &interpreter_engine; |
| 119 | const auto& attribute_register_map = config.input_register_map; | 101 | } |
| 120 | |||
| 121 | for (unsigned i = 0; i < num_attributes; i++) | ||
| 122 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | ||
| 123 | |||
| 124 | state.conditional_code[0] = false; | ||
| 125 | state.conditional_code[1] = false; | ||
| 126 | 102 | ||
| 103 | void Shutdown() { | ||
| 127 | #ifdef ARCHITECTURE_x86_64 | 104 | #ifdef ARCHITECTURE_x86_64 |
| 128 | if (VideoCore::g_shader_jit_enabled) { | 105 | jit_engine = nullptr; |
| 129 | jit_shader->Run(setup, state, config.main_offset); | ||
| 130 | } else { | ||
| 131 | DebugData<false> dummy_debug_data; | ||
| 132 | RunInterpreter(setup, state, dummy_debug_data, config.main_offset); | ||
| 133 | } | ||
| 134 | #else | ||
| 135 | DebugData<false> dummy_debug_data; | ||
| 136 | RunInterpreter(setup, state, dummy_debug_data, config.main_offset); | ||
| 137 | #endif // ARCHITECTURE_x86_64 | 106 | #endif // ARCHITECTURE_x86_64 |
| 138 | } | 107 | } |
| 139 | 108 | ||
| 140 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, | ||
| 141 | const Regs::ShaderConfig& config, | ||
| 142 | const ShaderSetup& setup) { | ||
| 143 | UnitState state; | ||
| 144 | DebugData<true> debug_data; | ||
| 145 | |||
| 146 | // Setup input register table | ||
| 147 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | ||
| 148 | const auto& attribute_register_map = config.input_register_map; | ||
| 149 | for (unsigned i = 0; i < num_attributes; i++) | ||
| 150 | state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | ||
| 151 | |||
| 152 | state.conditional_code[0] = false; | ||
| 153 | state.conditional_code[1] = false; | ||
| 154 | |||
| 155 | RunInterpreter(setup, state, debug_data, config.main_offset); | ||
| 156 | return debug_data; | ||
| 157 | } | ||
| 158 | |||
| 159 | } // namespace Shader | 109 | } // namespace Shader |
| 160 | 110 | ||
| 161 | } // namespace Pica | 111 | } // namespace Pica |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 2b07759b9..44d9f76c3 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <memory> | ||
| 10 | #include <type_traits> | 9 | #include <type_traits> |
| 11 | #include <nihstro/shader_bytecode.h> | 10 | #include <nihstro/shader_bytecode.h> |
| 12 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| @@ -15,7 +14,6 @@ | |||
| 15 | #include "common/vector_math.h" | 14 | #include "common/vector_math.h" |
| 16 | #include "video_core/pica.h" | 15 | #include "video_core/pica.h" |
| 17 | #include "video_core/pica_types.h" | 16 | #include "video_core/pica_types.h" |
| 18 | #include "video_core/shader/debug_data.h" | ||
| 19 | 17 | ||
| 20 | using nihstro::RegisterType; | 18 | using nihstro::RegisterType; |
| 21 | using nihstro::SourceRegister; | 19 | using nihstro::SourceRegister; |
| @@ -75,19 +73,13 @@ struct OutputVertex { | |||
| 75 | ret.Lerp(factor, v1); | 73 | ret.Lerp(factor, v1); |
| 76 | return ret; | 74 | return ret; |
| 77 | } | 75 | } |
| 76 | |||
| 77 | static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | ||
| 78 | u32 output_mask); | ||
| 78 | }; | 79 | }; |
| 79 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 80 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 80 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 81 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 81 | 82 | ||
| 82 | struct OutputRegisters { | ||
| 83 | OutputRegisters() = default; | ||
| 84 | |||
| 85 | alignas(16) Math::Vec4<float24> value[16]; | ||
| 86 | |||
| 87 | OutputVertex ToVertex(const Regs::ShaderConfig& config) const; | ||
| 88 | }; | ||
| 89 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); | ||
| 90 | |||
| 91 | /** | 83 | /** |
| 92 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 84 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 93 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 85 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| @@ -100,11 +92,10 @@ struct UnitState { | |||
| 100 | // required to be 16-byte aligned. | 92 | // required to be 16-byte aligned. |
| 101 | alignas(16) Math::Vec4<float24> input[16]; | 93 | alignas(16) Math::Vec4<float24> input[16]; |
| 102 | alignas(16) Math::Vec4<float24> temporary[16]; | 94 | alignas(16) Math::Vec4<float24> temporary[16]; |
| 95 | alignas(16) Math::Vec4<float24> output[16]; | ||
| 103 | } registers; | 96 | } registers; |
| 104 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | 97 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); |
| 105 | 98 | ||
| 106 | OutputRegisters output_registers; | ||
| 107 | |||
| 108 | bool conditional_code[2]; | 99 | bool conditional_code[2]; |
| 109 | 100 | ||
| 110 | // Two Address registers and one loop counter | 101 | // Two Address registers and one loop counter |
| @@ -130,7 +121,7 @@ struct UnitState { | |||
| 130 | static size_t OutputOffset(const DestRegister& reg) { | 121 | static size_t OutputOffset(const DestRegister& reg) { |
| 131 | switch (reg.GetRegisterType()) { | 122 | switch (reg.GetRegisterType()) { |
| 132 | case RegisterType::Output: | 123 | case RegisterType::Output: |
| 133 | return offsetof(UnitState, output_registers.value) + | 124 | return offsetof(UnitState, registers.output) + |
| 134 | reg.GetIndex() * sizeof(Math::Vec4<float24>); | 125 | reg.GetIndex() * sizeof(Math::Vec4<float24>); |
| 135 | 126 | ||
| 136 | case RegisterType::Temporary: | 127 | case RegisterType::Temporary: |
| @@ -142,13 +133,17 @@ struct UnitState { | |||
| 142 | return 0; | 133 | return 0; |
| 143 | } | 134 | } |
| 144 | } | 135 | } |
| 145 | }; | ||
| 146 | 136 | ||
| 147 | /// Clears the shader cache | 137 | /** |
| 148 | void ClearCache(); | 138 | * Loads the unit state with an input vertex. |
| 139 | * | ||
| 140 | * @param input Input vertex into the shader | ||
| 141 | * @param num_attributes The number of vertex shader attributes to load | ||
| 142 | */ | ||
| 143 | void LoadInputVertex(const InputVertex& input, int num_attributes); | ||
| 144 | }; | ||
| 149 | 145 | ||
| 150 | struct ShaderSetup { | 146 | struct ShaderSetup { |
| 151 | |||
| 152 | struct { | 147 | struct { |
| 153 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are | 148 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are |
| 154 | // therefore required to be 16-byte aligned. | 149 | // therefore required to be 16-byte aligned. |
| @@ -173,32 +168,37 @@ struct ShaderSetup { | |||
| 173 | std::array<u32, 1024> program_code; | 168 | std::array<u32, 1024> program_code; |
| 174 | std::array<u32, 1024> swizzle_data; | 169 | std::array<u32, 1024> swizzle_data; |
| 175 | 170 | ||
| 171 | /// Data private to ShaderEngines | ||
| 172 | struct EngineData { | ||
| 173 | unsigned int entry_point; | ||
| 174 | /// Used by the JIT, points to a compiled shader object. | ||
| 175 | const void* cached_shader = nullptr; | ||
| 176 | } engine_data; | ||
| 177 | }; | ||
| 178 | |||
| 179 | class ShaderEngine { | ||
| 180 | public: | ||
| 181 | virtual ~ShaderEngine() = default; | ||
| 182 | |||
| 176 | /** | 183 | /** |
| 177 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once | 184 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once |
| 178 | * per vertex, which would happen within the `Run` function). | 185 | * per vertex, which would happen within the `Run` function). |
| 179 | */ | 186 | */ |
| 180 | void Setup(); | 187 | virtual void SetupBatch(ShaderSetup& setup, unsigned int entry_point) = 0; |
| 181 | |||
| 182 | /** | ||
| 183 | * Runs the currently setup shader | ||
| 184 | * @param state Shader unit state, must be setup per shader and per shader unit | ||
| 185 | * @param input Input vertex into the shader | ||
| 186 | * @param num_attributes The number of vertex shader attributes | ||
| 187 | */ | ||
| 188 | void Run(UnitState& state, const InputVertex& input, int num_attributes); | ||
| 189 | 188 | ||
| 190 | /** | 189 | /** |
| 191 | * Produce debug information based on the given shader and input vertex | 190 | * Runs the currently setup shader. |
| 192 | * @param input Input vertex into the shader | 191 | * |
| 193 | * @param num_attributes The number of vertex shader attributes | 192 | * @param setup Shader engine state, must be setup with SetupBatch on each shader change. |
| 194 | * @param config Configuration object for the shader pipeline | 193 | * @param state Shader unit state, must be setup with input data before each shader invocation. |
| 195 | * @param setup Setup object for the shader pipeline | ||
| 196 | * @return Debug information for this shader with regards to the given vertex | ||
| 197 | */ | 194 | */ |
| 198 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, | 195 | virtual void Run(const ShaderSetup& setup, UnitState& state) const = 0; |
| 199 | const Regs::ShaderConfig& config, const ShaderSetup& setup); | ||
| 200 | }; | 196 | }; |
| 201 | 197 | ||
| 198 | // TODO(yuriks): Remove and make it non-global state somewhere | ||
| 199 | ShaderEngine* GetEngine(); | ||
| 200 | void Shutdown(); | ||
| 201 | |||
| 202 | } // namespace Shader | 202 | } // namespace Shader |
| 203 | 203 | ||
| 204 | } // namespace Pica | 204 | } // namespace Pica |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 20fb9754b..c0c89b857 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -7,10 +7,12 @@ | |||
| 7 | #include <cmath> | 7 | #include <cmath> |
| 8 | #include <numeric> | 8 | #include <numeric> |
| 9 | #include <boost/container/static_vector.hpp> | 9 | #include <boost/container/static_vector.hpp> |
| 10 | #include <boost/range/algorithm/fill.hpp> | ||
| 10 | #include <nihstro/shader_bytecode.h> | 11 | #include <nihstro/shader_bytecode.h> |
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 13 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 15 | #include "common/microprofile.h" | ||
| 14 | #include "common/vector_math.h" | 16 | #include "common/vector_math.h" |
| 15 | #include "video_core/pica_state.h" | 17 | #include "video_core/pica_state.h" |
| 16 | #include "video_core/pica_types.h" | 18 | #include "video_core/pica_types.h" |
| @@ -37,12 +39,15 @@ struct CallStackElement { | |||
| 37 | }; | 39 | }; |
| 38 | 40 | ||
| 39 | template <bool Debug> | 41 | template <bool Debug> |
| 40 | void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, | 42 | static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, |
| 41 | unsigned offset) { | 43 | unsigned offset) { |
| 42 | // TODO: Is there a maximal size for this? | 44 | // TODO: Is there a maximal size for this? |
| 43 | boost::container::static_vector<CallStackElement, 16> call_stack; | 45 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| 44 | u32 program_counter = offset; | 46 | u32 program_counter = offset; |
| 45 | 47 | ||
| 48 | state.conditional_code[0] = false; | ||
| 49 | state.conditional_code[1] = false; | ||
| 50 | |||
| 46 | auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset, | 51 | auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset, |
| 47 | u8 repeat_count, u8 loop_increment) { | 52 | u8 repeat_count, u8 loop_increment) { |
| 48 | // -1 to make sure when incrementing the PC we end up at the correct offset | 53 | // -1 to make sure when incrementing the PC we end up at the correct offset |
| @@ -73,9 +78,9 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug> | |||
| 73 | } | 78 | } |
| 74 | }; | 79 | }; |
| 75 | 80 | ||
| 76 | const auto& uniforms = g_state.vs.uniforms; | 81 | const auto& uniforms = setup.uniforms; |
| 77 | const auto& swizzle_data = g_state.vs.swizzle_data; | 82 | const auto& swizzle_data = setup.swizzle_data; |
| 78 | const auto& program_code = g_state.vs.program_code; | 83 | const auto& program_code = setup.program_code; |
| 79 | 84 | ||
| 80 | // Placeholder for invalid inputs | 85 | // Placeholder for invalid inputs |
| 81 | static float24 dummy_vec4_float24[4]; | 86 | static float24 dummy_vec4_float24[4]; |
| @@ -170,7 +175,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug> | |||
| 170 | 175 | ||
| 171 | float24* dest = | 176 | float24* dest = |
| 172 | (instr.common.dest.Value() < 0x10) | 177 | (instr.common.dest.Value() < 0x10) |
| 173 | ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] | 178 | ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] |
| 174 | : (instr.common.dest.Value() < 0x20) | 179 | : (instr.common.dest.Value() < 0x20) |
| 175 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | 180 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] |
| 176 | : dummy_vec4_float24; | 181 | : dummy_vec4_float24; |
| @@ -513,7 +518,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug> | |||
| 513 | 518 | ||
| 514 | float24* dest = | 519 | float24* dest = |
| 515 | (instr.mad.dest.Value() < 0x10) | 520 | (instr.mad.dest.Value() < 0x10) |
| 516 | ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] | 521 | ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] |
| 517 | : (instr.mad.dest.Value() < 0x20) | 522 | : (instr.mad.dest.Value() < 0x20) |
| 518 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 523 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 519 | : dummy_vec4_float24; | 524 | : dummy_vec4_float24; |
| @@ -647,9 +652,33 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug> | |||
| 647 | } | 652 | } |
| 648 | } | 653 | } |
| 649 | 654 | ||
| 650 | // Explicit instantiation | 655 | void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { |
| 651 | template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset); | 656 | ASSERT(entry_point < 1024); |
| 652 | template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset); | 657 | setup.engine_data.entry_point = entry_point; |
| 658 | } | ||
| 659 | |||
| 660 | MICROPROFILE_DECLARE(GPU_Shader); | ||
| 661 | |||
| 662 | void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { | ||
| 663 | |||
| 664 | MICROPROFILE_SCOPE(GPU_Shader); | ||
| 665 | |||
| 666 | DebugData<false> dummy_debug_data; | ||
| 667 | RunInterpreter(setup, state, dummy_debug_data, setup.engine_data.entry_point); | ||
| 668 | } | ||
| 669 | |||
| 670 | DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, | ||
| 671 | const InputVertex& input, | ||
| 672 | int num_attributes) const { | ||
| 673 | UnitState state; | ||
| 674 | DebugData<true> debug_data; | ||
| 675 | |||
| 676 | // Setup input register table | ||
| 677 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | ||
| 678 | state.LoadInputVertex(input, num_attributes); | ||
| 679 | RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); | ||
| 680 | return debug_data; | ||
| 681 | } | ||
| 653 | 682 | ||
| 654 | } // namespace | 683 | } // namespace |
| 655 | 684 | ||
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index d31dcd7a6..d6c0e2d8c 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -4,18 +4,28 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "video_core/shader/debug_data.h" | ||
| 8 | #include "video_core/shader/shader.h" | ||
| 9 | |||
| 7 | namespace Pica { | 10 | namespace Pica { |
| 8 | 11 | ||
| 9 | namespace Shader { | 12 | namespace Shader { |
| 10 | 13 | ||
| 11 | struct UnitState; | 14 | class InterpreterEngine final : public ShaderEngine { |
| 12 | 15 | public: | |
| 13 | template <bool Debug> | 16 | void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override; |
| 14 | struct DebugData; | 17 | void Run(const ShaderSetup& setup, UnitState& state) const override; |
| 15 | 18 | ||
| 16 | template <bool Debug> | 19 | /** |
| 17 | void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, | 20 | * Produce debug information based on the given shader and input vertex |
| 18 | unsigned offset); | 21 | * @param input Input vertex into the shader |
| 22 | * @param num_attributes The number of vertex shader attributes | ||
| 23 | * @param config Configuration object for the shader pipeline | ||
| 24 | * @return Debug information for this shader with regards to the given vertex | ||
| 25 | */ | ||
| 26 | DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, | ||
| 27 | int num_attributes) const; | ||
| 28 | }; | ||
| 19 | 29 | ||
| 20 | } // namespace | 30 | } // namespace |
| 21 | 31 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index c588b778b..0ee0dd9ef 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -1,888 +1,48 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | 1 | // Copyright 2016 Citra Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include "common/hash.h" |
| 6 | #include <cmath> | 6 | #include "common/microprofile.h" |
| 7 | #include <cstdint> | ||
| 8 | #include <nihstro/shader_bytecode.h> | ||
| 9 | #include <smmintrin.h> | ||
| 10 | #include <xmmintrin.h> | ||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "common/vector_math.h" | ||
| 14 | #include "common/x64/cpu_detect.h" | ||
| 15 | #include "common/x64/xbyak_abi.h" | ||
| 16 | #include "common/x64/xbyak_util.h" | ||
| 17 | #include "video_core/pica_state.h" | ||
| 18 | #include "video_core/pica_types.h" | ||
| 19 | #include "video_core/shader/shader.h" | 7 | #include "video_core/shader/shader.h" |
| 20 | #include "video_core/shader/shader_jit_x64.h" | 8 | #include "video_core/shader/shader_jit_x64.h" |
| 21 | 9 | #include "video_core/shader/shader_jit_x64_compiler.h" | |
| 22 | using namespace Common::X64; | ||
| 23 | using namespace Xbyak::util; | ||
| 24 | using Xbyak::Label; | ||
| 25 | using Xbyak::Reg32; | ||
| 26 | using Xbyak::Reg64; | ||
| 27 | using Xbyak::Xmm; | ||
| 28 | 10 | ||
| 29 | namespace Pica { | 11 | namespace Pica { |
| 30 | |||
| 31 | namespace Shader { | 12 | namespace Shader { |
| 32 | 13 | ||
| 33 | typedef void (JitShader::*JitFunction)(Instruction instr); | 14 | JitX64Engine::JitX64Engine() = default; |
| 34 | 15 | JitX64Engine::~JitX64Engine() = default; | |
| 35 | const JitFunction instr_table[64] = { | ||
| 36 | &JitShader::Compile_ADD, // add | ||
| 37 | &JitShader::Compile_DP3, // dp3 | ||
| 38 | &JitShader::Compile_DP4, // dp4 | ||
| 39 | &JitShader::Compile_DPH, // dph | ||
| 40 | nullptr, // unknown | ||
| 41 | &JitShader::Compile_EX2, // ex2 | ||
| 42 | &JitShader::Compile_LG2, // lg2 | ||
| 43 | nullptr, // unknown | ||
| 44 | &JitShader::Compile_MUL, // mul | ||
| 45 | &JitShader::Compile_SGE, // sge | ||
| 46 | &JitShader::Compile_SLT, // slt | ||
| 47 | &JitShader::Compile_FLR, // flr | ||
| 48 | &JitShader::Compile_MAX, // max | ||
| 49 | &JitShader::Compile_MIN, // min | ||
| 50 | &JitShader::Compile_RCP, // rcp | ||
| 51 | &JitShader::Compile_RSQ, // rsq | ||
| 52 | nullptr, // unknown | ||
| 53 | nullptr, // unknown | ||
| 54 | &JitShader::Compile_MOVA, // mova | ||
| 55 | &JitShader::Compile_MOV, // mov | ||
| 56 | nullptr, // unknown | ||
| 57 | nullptr, // unknown | ||
| 58 | nullptr, // unknown | ||
| 59 | nullptr, // unknown | ||
| 60 | &JitShader::Compile_DPH, // dphi | ||
| 61 | nullptr, // unknown | ||
| 62 | &JitShader::Compile_SGE, // sgei | ||
| 63 | &JitShader::Compile_SLT, // slti | ||
| 64 | nullptr, // unknown | ||
| 65 | nullptr, // unknown | ||
| 66 | nullptr, // unknown | ||
| 67 | nullptr, // unknown | ||
| 68 | nullptr, // unknown | ||
| 69 | &JitShader::Compile_NOP, // nop | ||
| 70 | &JitShader::Compile_END, // end | ||
| 71 | nullptr, // break | ||
| 72 | &JitShader::Compile_CALL, // call | ||
| 73 | &JitShader::Compile_CALLC, // callc | ||
| 74 | &JitShader::Compile_CALLU, // callu | ||
| 75 | &JitShader::Compile_IF, // ifu | ||
| 76 | &JitShader::Compile_IF, // ifc | ||
| 77 | &JitShader::Compile_LOOP, // loop | ||
| 78 | nullptr, // emit | ||
| 79 | nullptr, // sete | ||
| 80 | &JitShader::Compile_JMP, // jmpc | ||
| 81 | &JitShader::Compile_JMP, // jmpu | ||
| 82 | &JitShader::Compile_CMP, // cmp | ||
| 83 | &JitShader::Compile_CMP, // cmp | ||
| 84 | &JitShader::Compile_MAD, // madi | ||
| 85 | &JitShader::Compile_MAD, // madi | ||
| 86 | &JitShader::Compile_MAD, // madi | ||
| 87 | &JitShader::Compile_MAD, // madi | ||
| 88 | &JitShader::Compile_MAD, // madi | ||
| 89 | &JitShader::Compile_MAD, // madi | ||
| 90 | &JitShader::Compile_MAD, // madi | ||
| 91 | &JitShader::Compile_MAD, // madi | ||
| 92 | &JitShader::Compile_MAD, // mad | ||
| 93 | &JitShader::Compile_MAD, // mad | ||
| 94 | &JitShader::Compile_MAD, // mad | ||
| 95 | &JitShader::Compile_MAD, // mad | ||
| 96 | &JitShader::Compile_MAD, // mad | ||
| 97 | &JitShader::Compile_MAD, // mad | ||
| 98 | &JitShader::Compile_MAD, // mad | ||
| 99 | &JitShader::Compile_MAD, // mad | ||
| 100 | }; | ||
| 101 | |||
| 102 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can | ||
| 103 | // be used as scratch registers within a compiler function. The other registers have designated | ||
| 104 | // purposes, as documented below: | ||
| 105 | 16 | ||
| 106 | /// Pointer to the uniform memory | 17 | void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { |
| 107 | static const Reg64 SETUP = r9; | 18 | ASSERT(entry_point < 1024); |
| 108 | /// The two 32-bit VS address offset registers set by the MOVA instruction | 19 | setup.engine_data.entry_point = entry_point; |
| 109 | static const Reg64 ADDROFFS_REG_0 = r10; | ||
| 110 | static const Reg64 ADDROFFS_REG_1 = r11; | ||
| 111 | /// VS loop count register (Multiplied by 16) | ||
| 112 | static const Reg32 LOOPCOUNT_REG = r12d; | ||
| 113 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) | ||
| 114 | static const Reg32 LOOPCOUNT = esi; | ||
| 115 | /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) | ||
| 116 | static const Reg32 LOOPINC = edi; | ||
| 117 | /// Result of the previous CMP instruction for the X-component comparison | ||
| 118 | static const Reg64 COND0 = r13; | ||
| 119 | /// Result of the previous CMP instruction for the Y-component comparison | ||
| 120 | static const Reg64 COND1 = r14; | ||
| 121 | /// Pointer to the UnitState instance for the current VS unit | ||
| 122 | static const Reg64 STATE = r15; | ||
| 123 | /// SIMD scratch register | ||
| 124 | static const Xmm SCRATCH = xmm0; | ||
| 125 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register | ||
| 126 | static const Xmm SRC1 = xmm1; | ||
| 127 | /// Loaded with the second swizzled source register, otherwise can be used as a scratch register | ||
| 128 | static const Xmm SRC2 = xmm2; | ||
| 129 | /// Loaded with the third swizzled source register, otherwise can be used as a scratch register | ||
| 130 | static const Xmm SRC3 = xmm3; | ||
| 131 | /// Additional scratch register | ||
| 132 | static const Xmm SCRATCH2 = xmm4; | ||
| 133 | /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one | ||
| 134 | static const Xmm ONE = xmm14; | ||
| 135 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR | ||
| 136 | static const Xmm NEGBIT = xmm15; | ||
| 137 | 20 | ||
| 138 | // State registers that must not be modified by external functions calls | 21 | u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); |
| 139 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | 22 | u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); |
| 140 | static const BitSet32 persistent_regs = BuildRegSet({ | ||
| 141 | // Pointers to register blocks | ||
| 142 | SETUP, STATE, | ||
| 143 | // Cached registers | ||
| 144 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, | ||
| 145 | // Constants | ||
| 146 | ONE, NEGBIT, | ||
| 147 | }); | ||
| 148 | 23 | ||
| 149 | /// Raw constant for the source register selector that indicates no swizzling is performed | 24 | u64 cache_key = code_hash ^ swizzle_hash; |
| 150 | static const u8 NO_SRC_REG_SWIZZLE = 0x1b; | 25 | auto iter = cache.find(cache_key); |
| 151 | /// Raw constant for the destination register enable mask that indicates all components are enabled | 26 | if (iter != cache.end()) { |
| 152 | static const u8 NO_DEST_REG_MASK = 0xf; | 27 | setup.engine_data.cached_shader = iter->second.get(); |
| 153 | |||
| 154 | /** | ||
| 155 | * Get the vertex shader instruction for a given offset in the current shader program | ||
| 156 | * @param offset Offset in the current shader program of the instruction | ||
| 157 | * @return Instruction at the specified offset | ||
| 158 | */ | ||
| 159 | static Instruction GetVertexShaderInstruction(size_t offset) { | ||
| 160 | return {g_state.vs.program_code[offset]}; | ||
| 161 | } | ||
| 162 | |||
| 163 | static void LogCritical(const char* msg) { | ||
| 164 | LOG_CRITICAL(HW_GPU, "%s", msg); | ||
| 165 | } | ||
| 166 | |||
| 167 | void JitShader::Compile_Assert(bool condition, const char* msg) { | ||
| 168 | if (!condition) { | ||
| 169 | mov(ABI_PARAM1, reinterpret_cast<size_t>(msg)); | ||
| 170 | CallFarFunction(*this, LogCritical); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | /** | ||
| 175 | * Loads and swizzles a source register into the specified XMM register. | ||
| 176 | * @param instr VS instruction, used for determining how to load the source register | ||
| 177 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) | ||
| 178 | * @param src_reg SourceRegister object corresponding to the source register to load | ||
| 179 | * @param dest Destination XMM register to store the loaded, swizzled source register | ||
| 180 | */ | ||
| 181 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, | ||
| 182 | Xmm dest) { | ||
| 183 | Reg64 src_ptr; | ||
| 184 | size_t src_offset; | ||
| 185 | |||
| 186 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | ||
| 187 | src_ptr = SETUP; | ||
| 188 | src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); | ||
| 189 | } else { | 28 | } else { |
| 190 | src_ptr = STATE; | 29 | auto shader = std::make_unique<JitShader>(); |
| 191 | src_offset = UnitState::InputOffset(src_reg); | 30 | shader->Compile(&setup.program_code, &setup.swizzle_data); |
| 192 | } | 31 | setup.engine_data.cached_shader = shader.get(); |
| 193 | 32 | cache.emplace_hint(iter, cache_key, std::move(shader)); | |
| 194 | int src_offset_disp = (int)src_offset; | ||
| 195 | ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type"); | ||
| 196 | |||
| 197 | unsigned operand_desc_id; | ||
| 198 | |||
| 199 | const bool is_inverted = | ||
| 200 | (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | ||
| 201 | |||
| 202 | unsigned address_register_index; | ||
| 203 | unsigned offset_src; | ||
| 204 | |||
| 205 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | ||
| 206 | instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | ||
| 207 | operand_desc_id = instr.mad.operand_desc_id; | ||
| 208 | offset_src = is_inverted ? 3 : 2; | ||
| 209 | address_register_index = instr.mad.address_register_index; | ||
| 210 | } else { | ||
| 211 | operand_desc_id = instr.common.operand_desc_id; | ||
| 212 | offset_src = is_inverted ? 2 : 1; | ||
| 213 | address_register_index = instr.common.address_register_index; | ||
| 214 | } | ||
| 215 | |||
| 216 | if (src_num == offset_src && address_register_index != 0) { | ||
| 217 | switch (address_register_index) { | ||
| 218 | case 1: // address offset 1 | ||
| 219 | movaps(dest, xword[src_ptr + ADDROFFS_REG_0 + src_offset_disp]); | ||
| 220 | break; | ||
| 221 | case 2: // address offset 2 | ||
| 222 | movaps(dest, xword[src_ptr + ADDROFFS_REG_1 + src_offset_disp]); | ||
| 223 | break; | ||
| 224 | case 3: // address offset 3 | ||
| 225 | movaps(dest, xword[src_ptr + LOOPCOUNT_REG.cvt64() + src_offset_disp]); | ||
| 226 | break; | ||
| 227 | default: | ||
| 228 | UNREACHABLE(); | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | } else { | ||
| 232 | // Load the source | ||
| 233 | movaps(dest, xword[src_ptr + src_offset_disp]); | ||
| 234 | } | ||
| 235 | |||
| 236 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; | ||
| 237 | |||
| 238 | // Generate instructions for source register swizzling as needed | ||
| 239 | u8 sel = swiz.GetRawSelector(src_num); | ||
| 240 | if (sel != NO_SRC_REG_SWIZZLE) { | ||
| 241 | // Selector component order needs to be reversed for the SHUFPS instruction | ||
| 242 | sel = ((sel & 0xc0) >> 6) | ((sel & 3) << 6) | ((sel & 0xc) << 2) | ((sel & 0x30) >> 2); | ||
| 243 | |||
| 244 | // Shuffle inputs for swizzle | ||
| 245 | shufps(dest, dest, sel); | ||
| 246 | } | ||
| 247 | |||
| 248 | // If the source register should be negated, flip the negative bit using XOR | ||
| 249 | const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3}; | ||
| 250 | if (negate[src_num - 1]) { | ||
| 251 | xorps(dest, NEGBIT); | ||
| 252 | } | 33 | } |
| 253 | } | 34 | } |
| 254 | 35 | ||
| 255 | void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { | 36 | MICROPROFILE_DECLARE(GPU_Shader); |
| 256 | DestRegister dest; | ||
| 257 | unsigned operand_desc_id; | ||
| 258 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | ||
| 259 | instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | ||
| 260 | operand_desc_id = instr.mad.operand_desc_id; | ||
| 261 | dest = instr.mad.dest.Value(); | ||
| 262 | } else { | ||
| 263 | operand_desc_id = instr.common.operand_desc_id; | ||
| 264 | dest = instr.common.dest.Value(); | ||
| 265 | } | ||
| 266 | |||
| 267 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; | ||
| 268 | |||
| 269 | size_t dest_offset_disp = UnitState::OutputOffset(dest); | ||
| 270 | |||
| 271 | // If all components are enabled, write the result to the destination register | ||
| 272 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | ||
| 273 | // Store dest back to memory | ||
| 274 | movaps(xword[STATE + dest_offset_disp], src); | ||
| 275 | |||
| 276 | } else { | ||
| 277 | // Not all components are enabled, so mask the result when storing to the destination | ||
| 278 | // register... | ||
| 279 | movaps(SCRATCH, xword[STATE + dest_offset_disp]); | ||
| 280 | |||
| 281 | if (Common::GetCPUCaps().sse4_1) { | ||
| 282 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | | ||
| 283 | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||
| 284 | blendps(SCRATCH, src, mask); | ||
| 285 | } else { | ||
| 286 | movaps(SCRATCH2, src); | ||
| 287 | unpckhps(SCRATCH2, SCRATCH); // Unpack X/Y components of source and destination | ||
| 288 | unpcklps(SCRATCH, src); // Unpack Z/W components of source and destination | ||
| 289 | |||
| 290 | // Compute selector to selectively copy source components to destination for SHUFPS | ||
| 291 | // instruction | ||
| 292 | u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | | ||
| 293 | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | | ||
| 294 | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | | ||
| 295 | ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6); | ||
| 296 | shufps(SCRATCH, SCRATCH2, sel); | ||
| 297 | } | ||
| 298 | |||
| 299 | // Store dest back to memory | ||
| 300 | movaps(xword[STATE + dest_offset_disp], SCRATCH); | ||
| 301 | } | ||
| 302 | } | ||
| 303 | |||
| 304 | void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) { | ||
| 305 | movaps(scratch, src1); | ||
| 306 | cmpordps(scratch, src2); | ||
| 307 | |||
| 308 | mulps(src1, src2); | ||
| 309 | 37 | ||
| 310 | movaps(src2, src1); | 38 | void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const { |
| 311 | cmpunordps(src2, src2); | 39 | ASSERT(setup.engine_data.cached_shader != nullptr); |
| 312 | 40 | ||
| 313 | xorps(scratch, src2); | 41 | MICROPROFILE_SCOPE(GPU_Shader); |
| 314 | andps(src1, scratch); | ||
| 315 | } | ||
| 316 | |||
| 317 | void JitShader::Compile_EvaluateCondition(Instruction instr) { | ||
| 318 | // Note: NXOR is used below to check for equality | ||
| 319 | switch (instr.flow_control.op) { | ||
| 320 | case Instruction::FlowControlType::Or: | ||
| 321 | mov(eax, COND0); | ||
| 322 | mov(ebx, COND1); | ||
| 323 | xor(eax, (instr.flow_control.refx.Value() ^ 1)); | ||
| 324 | xor(ebx, (instr.flow_control.refy.Value() ^ 1)); | ||
| 325 | or (eax, ebx); | ||
| 326 | break; | ||
| 327 | |||
| 328 | case Instruction::FlowControlType::And: | ||
| 329 | mov(eax, COND0); | ||
| 330 | mov(ebx, COND1); | ||
| 331 | xor(eax, (instr.flow_control.refx.Value() ^ 1)); | ||
| 332 | xor(ebx, (instr.flow_control.refy.Value() ^ 1)); | ||
| 333 | and(eax, ebx); | ||
| 334 | break; | ||
| 335 | |||
| 336 | case Instruction::FlowControlType::JustX: | ||
| 337 | mov(eax, COND0); | ||
| 338 | xor(eax, (instr.flow_control.refx.Value() ^ 1)); | ||
| 339 | break; | ||
| 340 | |||
| 341 | case Instruction::FlowControlType::JustY: | ||
| 342 | mov(eax, COND1); | ||
| 343 | xor(eax, (instr.flow_control.refy.Value() ^ 1)); | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | 42 | ||
| 348 | void JitShader::Compile_UniformCondition(Instruction instr) { | 43 | const JitShader* shader = static_cast<const JitShader*>(setup.engine_data.cached_shader); |
| 349 | size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); | 44 | shader->Run(setup, state, setup.engine_data.entry_point); |
| 350 | cmp(byte[SETUP + offset], 0); | ||
| 351 | } | 45 | } |
| 352 | 46 | ||
| 353 | BitSet32 JitShader::PersistentCallerSavedRegs() { | ||
| 354 | return persistent_regs & ABI_ALL_CALLER_SAVED; | ||
| 355 | } | ||
| 356 | |||
| 357 | void JitShader::Compile_ADD(Instruction instr) { | ||
| 358 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 359 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 360 | addps(SRC1, SRC2); | ||
| 361 | Compile_DestEnable(instr, SRC1); | ||
| 362 | } | ||
| 363 | |||
| 364 | void JitShader::Compile_DP3(Instruction instr) { | ||
| 365 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 366 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 367 | |||
| 368 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 369 | |||
| 370 | movaps(SRC2, SRC1); | ||
| 371 | shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1)); | ||
| 372 | |||
| 373 | movaps(SRC3, SRC1); | ||
| 374 | shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2)); | ||
| 375 | |||
| 376 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); | ||
| 377 | addps(SRC1, SRC2); | ||
| 378 | addps(SRC1, SRC3); | ||
| 379 | |||
| 380 | Compile_DestEnable(instr, SRC1); | ||
| 381 | } | ||
| 382 | |||
| 383 | void JitShader::Compile_DP4(Instruction instr) { | ||
| 384 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 385 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 386 | |||
| 387 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 388 | |||
| 389 | movaps(SRC2, SRC1); | ||
| 390 | shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY | ||
| 391 | addps(SRC1, SRC2); | ||
| 392 | |||
| 393 | movaps(SRC2, SRC1); | ||
| 394 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX | ||
| 395 | addps(SRC1, SRC2); | ||
| 396 | |||
| 397 | Compile_DestEnable(instr, SRC1); | ||
| 398 | } | ||
| 399 | |||
| 400 | void JitShader::Compile_DPH(Instruction instr) { | ||
| 401 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { | ||
| 402 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 403 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 404 | } else { | ||
| 405 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 406 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 407 | } | ||
| 408 | |||
| 409 | if (Common::GetCPUCaps().sse4_1) { | ||
| 410 | // Set 4th component to 1.0 | ||
| 411 | blendps(SRC1, ONE, 0b1000); | ||
| 412 | } else { | ||
| 413 | // Set 4th component to 1.0 | ||
| 414 | movaps(SCRATCH, SRC1); | ||
| 415 | unpckhps(SCRATCH, ONE); // XYZW, 1111 -> Z1__ | ||
| 416 | unpcklpd(SRC1, SCRATCH); // XYZW, Z1__ -> XYZ1 | ||
| 417 | } | ||
| 418 | |||
| 419 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 420 | |||
| 421 | movaps(SRC2, SRC1); | ||
| 422 | shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY | ||
| 423 | addps(SRC1, SRC2); | ||
| 424 | |||
| 425 | movaps(SRC2, SRC1); | ||
| 426 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX | ||
| 427 | addps(SRC1, SRC2); | ||
| 428 | |||
| 429 | Compile_DestEnable(instr, SRC1); | ||
| 430 | } | ||
| 431 | |||
| 432 | void JitShader::Compile_EX2(Instruction instr) { | ||
| 433 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 434 | movss(xmm0, SRC1); // ABI_PARAM1 | ||
| 435 | |||
| 436 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 437 | CallFarFunction(*this, exp2f); | ||
| 438 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 439 | |||
| 440 | shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN | ||
| 441 | movaps(SRC1, xmm0); | ||
| 442 | Compile_DestEnable(instr, SRC1); | ||
| 443 | } | ||
| 444 | |||
| 445 | void JitShader::Compile_LG2(Instruction instr) { | ||
| 446 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 447 | movss(xmm0, SRC1); // ABI_PARAM1 | ||
| 448 | |||
| 449 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 450 | CallFarFunction(*this, log2f); | ||
| 451 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 452 | |||
| 453 | shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN | ||
| 454 | movaps(SRC1, xmm0); | ||
| 455 | Compile_DestEnable(instr, SRC1); | ||
| 456 | } | ||
| 457 | |||
| 458 | void JitShader::Compile_MUL(Instruction instr) { | ||
| 459 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 460 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 461 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 462 | Compile_DestEnable(instr, SRC1); | ||
| 463 | } | ||
| 464 | |||
| 465 | void JitShader::Compile_SGE(Instruction instr) { | ||
| 466 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { | ||
| 467 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 468 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 469 | } else { | ||
| 470 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 471 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 472 | } | ||
| 473 | |||
| 474 | cmpleps(SRC2, SRC1); | ||
| 475 | andps(SRC2, ONE); | ||
| 476 | |||
| 477 | Compile_DestEnable(instr, SRC2); | ||
| 478 | } | ||
| 479 | |||
| 480 | void JitShader::Compile_SLT(Instruction instr) { | ||
| 481 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { | ||
| 482 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 483 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 484 | } else { | ||
| 485 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 486 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 487 | } | ||
| 488 | |||
| 489 | cmpltps(SRC1, SRC2); | ||
| 490 | andps(SRC1, ONE); | ||
| 491 | |||
| 492 | Compile_DestEnable(instr, SRC1); | ||
| 493 | } | ||
| 494 | |||
| 495 | void JitShader::Compile_FLR(Instruction instr) { | ||
| 496 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 497 | |||
| 498 | if (Common::GetCPUCaps().sse4_1) { | ||
| 499 | roundps(SRC1, SRC1, _MM_FROUND_FLOOR); | ||
| 500 | } else { | ||
| 501 | cvttps2dq(SRC1, SRC1); | ||
| 502 | cvtdq2ps(SRC1, SRC1); | ||
| 503 | } | ||
| 504 | |||
| 505 | Compile_DestEnable(instr, SRC1); | ||
| 506 | } | ||
| 507 | |||
| 508 | void JitShader::Compile_MAX(Instruction instr) { | ||
| 509 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 510 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 511 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | ||
| 512 | maxps(SRC1, SRC2); | ||
| 513 | Compile_DestEnable(instr, SRC1); | ||
| 514 | } | ||
| 515 | |||
| 516 | void JitShader::Compile_MIN(Instruction instr) { | ||
| 517 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 518 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 519 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | ||
| 520 | minps(SRC1, SRC2); | ||
| 521 | Compile_DestEnable(instr, SRC1); | ||
| 522 | } | ||
| 523 | |||
| 524 | void JitShader::Compile_MOVA(Instruction instr) { | ||
| 525 | SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]}; | ||
| 526 | |||
| 527 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { | ||
| 528 | return; // NoOp | ||
| 529 | } | ||
| 530 | |||
| 531 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 532 | |||
| 533 | // Convert floats to integers using truncation (only care about X and Y components) | ||
| 534 | cvttps2dq(SRC1, SRC1); | ||
| 535 | |||
| 536 | // Get result | ||
| 537 | movq(rax, SRC1); | ||
| 538 | |||
| 539 | // Handle destination enable | ||
| 540 | if (swiz.DestComponentEnabled(0) && swiz.DestComponentEnabled(1)) { | ||
| 541 | // Move and sign-extend low 32 bits | ||
| 542 | movsxd(ADDROFFS_REG_0, eax); | ||
| 543 | |||
| 544 | // Move and sign-extend high 32 bits | ||
| 545 | shr(rax, 32); | ||
| 546 | movsxd(ADDROFFS_REG_1, eax); | ||
| 547 | |||
| 548 | // Multiply by 16 to be used as an offset later | ||
| 549 | shl(ADDROFFS_REG_0, 4); | ||
| 550 | shl(ADDROFFS_REG_1, 4); | ||
| 551 | } else { | ||
| 552 | if (swiz.DestComponentEnabled(0)) { | ||
| 553 | // Move and sign-extend low 32 bits | ||
| 554 | movsxd(ADDROFFS_REG_0, eax); | ||
| 555 | |||
| 556 | // Multiply by 16 to be used as an offset later | ||
| 557 | shl(ADDROFFS_REG_0, 4); | ||
| 558 | } else if (swiz.DestComponentEnabled(1)) { | ||
| 559 | // Move and sign-extend high 32 bits | ||
| 560 | shr(rax, 32); | ||
| 561 | movsxd(ADDROFFS_REG_1, eax); | ||
| 562 | |||
| 563 | // Multiply by 16 to be used as an offset later | ||
| 564 | shl(ADDROFFS_REG_1, 4); | ||
| 565 | } | ||
| 566 | } | ||
| 567 | } | ||
| 568 | |||
| 569 | void JitShader::Compile_MOV(Instruction instr) { | ||
| 570 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 571 | Compile_DestEnable(instr, SRC1); | ||
| 572 | } | ||
| 573 | |||
| 574 | void JitShader::Compile_RCP(Instruction instr) { | ||
| 575 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 576 | |||
| 577 | // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica | ||
| 578 | // performs this operation more accurately. This should be checked on hardware. | ||
| 579 | rcpss(SRC1, SRC1); | ||
| 580 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX | ||
| 581 | |||
| 582 | Compile_DestEnable(instr, SRC1); | ||
| 583 | } | ||
| 584 | |||
| 585 | void JitShader::Compile_RSQ(Instruction instr) { | ||
| 586 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 587 | |||
| 588 | // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica | ||
| 589 | // performs this operation more accurately. This should be checked on hardware. | ||
| 590 | rsqrtss(SRC1, SRC1); | ||
| 591 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX | ||
| 592 | |||
| 593 | Compile_DestEnable(instr, SRC1); | ||
| 594 | } | ||
| 595 | |||
| 596 | void JitShader::Compile_NOP(Instruction instr) {} | ||
| 597 | |||
| 598 | void JitShader::Compile_END(Instruction instr) { | ||
| 599 | ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8); | ||
| 600 | ret(); | ||
| 601 | } | ||
| 602 | |||
| 603 | void JitShader::Compile_CALL(Instruction instr) { | ||
| 604 | // Push offset of the return | ||
| 605 | push(qword, (instr.flow_control.dest_offset + instr.flow_control.num_instructions)); | ||
| 606 | |||
| 607 | // Call the subroutine | ||
| 608 | call(instruction_labels[instr.flow_control.dest_offset]); | ||
| 609 | |||
| 610 | // Skip over the return offset that's on the stack | ||
| 611 | add(rsp, 8); | ||
| 612 | } | ||
| 613 | |||
| 614 | void JitShader::Compile_CALLC(Instruction instr) { | ||
| 615 | Compile_EvaluateCondition(instr); | ||
| 616 | Label b; | ||
| 617 | jz(b); | ||
| 618 | Compile_CALL(instr); | ||
| 619 | L(b); | ||
| 620 | } | ||
| 621 | |||
| 622 | void JitShader::Compile_CALLU(Instruction instr) { | ||
| 623 | Compile_UniformCondition(instr); | ||
| 624 | Label b; | ||
| 625 | jz(b); | ||
| 626 | Compile_CALL(instr); | ||
| 627 | L(b); | ||
| 628 | } | ||
| 629 | |||
| 630 | void JitShader::Compile_CMP(Instruction instr) { | ||
| 631 | using Op = Instruction::Common::CompareOpType::Op; | ||
| 632 | Op op_x = instr.common.compare_op.x; | ||
| 633 | Op op_y = instr.common.compare_op.y; | ||
| 634 | |||
| 635 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 636 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 637 | |||
| 638 | // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to | ||
| 639 | // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here | ||
| 640 | // because they don't match when used with NaNs. | ||
| 641 | static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE}; | ||
| 642 | |||
| 643 | bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); | ||
| 644 | Xmm lhs_x = invert_op_x ? SRC2 : SRC1; | ||
| 645 | Xmm rhs_x = invert_op_x ? SRC1 : SRC2; | ||
| 646 | |||
| 647 | if (op_x == op_y) { | ||
| 648 | // Compare X-component and Y-component together | ||
| 649 | cmpps(lhs_x, rhs_x, cmp[op_x]); | ||
| 650 | movq(COND0, lhs_x); | ||
| 651 | |||
| 652 | mov(COND1, COND0); | ||
| 653 | } else { | ||
| 654 | bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual); | ||
| 655 | Xmm lhs_y = invert_op_y ? SRC2 : SRC1; | ||
| 656 | Xmm rhs_y = invert_op_y ? SRC1 : SRC2; | ||
| 657 | |||
| 658 | // Compare X-component | ||
| 659 | movaps(SCRATCH, lhs_x); | ||
| 660 | cmpss(SCRATCH, rhs_x, cmp[op_x]); | ||
| 661 | |||
| 662 | // Compare Y-component | ||
| 663 | cmpps(lhs_y, rhs_y, cmp[op_y]); | ||
| 664 | |||
| 665 | movq(COND0, SCRATCH); | ||
| 666 | movq(COND1, lhs_y); | ||
| 667 | } | ||
| 668 | |||
| 669 | shr(COND0.cvt32(), 31); // ignores upper 32 bits in source | ||
| 670 | shr(COND1, 63); | ||
| 671 | } | ||
| 672 | |||
| 673 | void JitShader::Compile_MAD(Instruction instr) { | ||
| 674 | Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); | ||
| 675 | |||
| 676 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | ||
| 677 | Compile_SwizzleSrc(instr, 2, instr.mad.src2i, SRC2); | ||
| 678 | Compile_SwizzleSrc(instr, 3, instr.mad.src3i, SRC3); | ||
| 679 | } else { | ||
| 680 | Compile_SwizzleSrc(instr, 2, instr.mad.src2, SRC2); | ||
| 681 | Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3); | ||
| 682 | } | ||
| 683 | |||
| 684 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 685 | addps(SRC1, SRC3); | ||
| 686 | |||
| 687 | Compile_DestEnable(instr, SRC1); | ||
| 688 | } | ||
| 689 | |||
| 690 | void JitShader::Compile_IF(Instruction instr) { | ||
| 691 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, | ||
| 692 | "Backwards if-statements not supported"); | ||
| 693 | Label l_else, l_endif; | ||
| 694 | |||
| 695 | // Evaluate the "IF" condition | ||
| 696 | if (instr.opcode.Value() == OpCode::Id::IFU) { | ||
| 697 | Compile_UniformCondition(instr); | ||
| 698 | } else if (instr.opcode.Value() == OpCode::Id::IFC) { | ||
| 699 | Compile_EvaluateCondition(instr); | ||
| 700 | } | ||
| 701 | jz(l_else, T_NEAR); | ||
| 702 | |||
| 703 | // Compile the code that corresponds to the condition evaluating as true | ||
| 704 | Compile_Block(instr.flow_control.dest_offset); | ||
| 705 | |||
| 706 | // If there isn't an "ELSE" condition, we are done here | ||
| 707 | if (instr.flow_control.num_instructions == 0) { | ||
| 708 | L(l_else); | ||
| 709 | return; | ||
| 710 | } | ||
| 711 | |||
| 712 | jmp(l_endif, T_NEAR); | ||
| 713 | |||
| 714 | L(l_else); | ||
| 715 | // This code corresponds to the "ELSE" condition | ||
| 716 | // Comple the code that corresponds to the condition evaluating as false | ||
| 717 | Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 718 | |||
| 719 | L(l_endif); | ||
| 720 | } | ||
| 721 | |||
| 722 | void JitShader::Compile_LOOP(Instruction instr) { | ||
| 723 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, | ||
| 724 | "Backwards loops not supported"); | ||
| 725 | Compile_Assert(!looping, "Nested loops not supported"); | ||
| 726 | |||
| 727 | looping = true; | ||
| 728 | |||
| 729 | // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. | ||
| 730 | // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by | ||
| 731 | // 4 bits) to be used as an offset into the 16-byte vector registers later | ||
| 732 | size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); | ||
| 733 | mov(LOOPCOUNT, dword[SETUP + offset]); | ||
| 734 | mov(LOOPCOUNT_REG, LOOPCOUNT); | ||
| 735 | shr(LOOPCOUNT_REG, 4); | ||
| 736 | and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start | ||
| 737 | mov(LOOPINC, LOOPCOUNT); | ||
| 738 | shr(LOOPINC, 12); | ||
| 739 | and(LOOPINC, 0xFF0); // Z-component is the incrementer | ||
| 740 | movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count | ||
| 741 | add(LOOPCOUNT, 1); // Iteration count is X-component + 1 | ||
| 742 | |||
| 743 | Label l_loop_start; | ||
| 744 | L(l_loop_start); | ||
| 745 | |||
| 746 | Compile_Block(instr.flow_control.dest_offset + 1); | ||
| 747 | |||
| 748 | add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component | ||
| 749 | sub(LOOPCOUNT, 1); // Increment loop count by 1 | ||
| 750 | jnz(l_loop_start); // Loop if not equal | ||
| 751 | |||
| 752 | looping = false; | ||
| 753 | } | ||
| 754 | |||
| 755 | void JitShader::Compile_JMP(Instruction instr) { | ||
| 756 | if (instr.opcode.Value() == OpCode::Id::JMPC) | ||
| 757 | Compile_EvaluateCondition(instr); | ||
| 758 | else if (instr.opcode.Value() == OpCode::Id::JMPU) | ||
| 759 | Compile_UniformCondition(instr); | ||
| 760 | else | ||
| 761 | UNREACHABLE(); | ||
| 762 | |||
| 763 | bool inverted_condition = | ||
| 764 | (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1); | ||
| 765 | |||
| 766 | Label& b = instruction_labels[instr.flow_control.dest_offset]; | ||
| 767 | if (inverted_condition) { | ||
| 768 | jz(b, T_NEAR); | ||
| 769 | } else { | ||
| 770 | jnz(b, T_NEAR); | ||
| 771 | } | ||
| 772 | } | ||
| 773 | |||
| 774 | void JitShader::Compile_Block(unsigned end) { | ||
| 775 | while (program_counter < end) { | ||
| 776 | Compile_NextInstr(); | ||
| 777 | } | ||
| 778 | } | ||
| 779 | |||
| 780 | void JitShader::Compile_Return() { | ||
| 781 | // Peek return offset on the stack and check if we're at that offset | ||
| 782 | mov(rax, qword[rsp + 8]); | ||
| 783 | cmp(eax, (program_counter)); | ||
| 784 | |||
| 785 | // If so, jump back to before CALL | ||
| 786 | Label b; | ||
| 787 | jnz(b); | ||
| 788 | ret(); | ||
| 789 | L(b); | ||
| 790 | } | ||
| 791 | |||
| 792 | void JitShader::Compile_NextInstr() { | ||
| 793 | if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) { | ||
| 794 | Compile_Return(); | ||
| 795 | } | ||
| 796 | |||
| 797 | L(instruction_labels[program_counter]); | ||
| 798 | |||
| 799 | Instruction instr = GetVertexShaderInstruction(program_counter++); | ||
| 800 | |||
| 801 | OpCode::Id opcode = instr.opcode.Value(); | ||
| 802 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; | ||
| 803 | |||
| 804 | if (instr_func) { | ||
| 805 | // JIT the instruction! | ||
| 806 | ((*this).*instr_func)(instr); | ||
| 807 | } else { | ||
| 808 | // Unhandled instruction | ||
| 809 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", | ||
| 810 | instr.opcode.Value().EffectiveOpCode(), instr.hex); | ||
| 811 | } | ||
| 812 | } | ||
| 813 | |||
| 814 | void JitShader::FindReturnOffsets() { | ||
| 815 | return_offsets.clear(); | ||
| 816 | |||
| 817 | for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { | ||
| 818 | Instruction instr = GetVertexShaderInstruction(offset); | ||
| 819 | |||
| 820 | switch (instr.opcode.Value()) { | ||
| 821 | case OpCode::Id::CALL: | ||
| 822 | case OpCode::Id::CALLC: | ||
| 823 | case OpCode::Id::CALLU: | ||
| 824 | return_offsets.push_back(instr.flow_control.dest_offset + | ||
| 825 | instr.flow_control.num_instructions); | ||
| 826 | break; | ||
| 827 | default: | ||
| 828 | break; | ||
| 829 | } | ||
| 830 | } | ||
| 831 | |||
| 832 | // Sort for efficient binary search later | ||
| 833 | std::sort(return_offsets.begin(), return_offsets.end()); | ||
| 834 | } | ||
| 835 | |||
| 836 | void JitShader::Compile() { | ||
| 837 | // Reset flow control state | ||
| 838 | program = (CompiledShader*)getCurr(); | ||
| 839 | program_counter = 0; | ||
| 840 | looping = false; | ||
| 841 | instruction_labels.fill(Xbyak::Label()); | ||
| 842 | |||
| 843 | // Find all `CALL` instructions and identify return locations | ||
| 844 | FindReturnOffsets(); | ||
| 845 | |||
| 846 | // The stack pointer is 8 modulo 16 at the entry of a procedure | ||
| 847 | ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8); | ||
| 848 | |||
| 849 | mov(SETUP, ABI_PARAM1); | ||
| 850 | mov(STATE, ABI_PARAM2); | ||
| 851 | |||
| 852 | // Zero address/loop registers | ||
| 853 | xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); | ||
| 854 | xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); | ||
| 855 | xor(LOOPCOUNT_REG, LOOPCOUNT_REG); | ||
| 856 | |||
| 857 | // Used to set a register to one | ||
| 858 | static const __m128 one = {1.f, 1.f, 1.f, 1.f}; | ||
| 859 | mov(rax, reinterpret_cast<size_t>(&one)); | ||
| 860 | movaps(ONE, xword[rax]); | ||
| 861 | |||
| 862 | // Used to negate registers | ||
| 863 | static const __m128 neg = {-0.f, -0.f, -0.f, -0.f}; | ||
| 864 | mov(rax, reinterpret_cast<size_t>(&neg)); | ||
| 865 | movaps(NEGBIT, xword[rax]); | ||
| 866 | |||
| 867 | // Jump to start of the shader program | ||
| 868 | jmp(ABI_PARAM3); | ||
| 869 | |||
| 870 | // Compile entire program | ||
| 871 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | ||
| 872 | |||
| 873 | // Free memory that's no longer needed | ||
| 874 | return_offsets.clear(); | ||
| 875 | return_offsets.shrink_to_fit(); | ||
| 876 | |||
| 877 | ready(); | ||
| 878 | |||
| 879 | uintptr_t size = reinterpret_cast<uintptr_t>(getCurr()) - reinterpret_cast<uintptr_t>(program); | ||
| 880 | ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); | ||
| 881 | LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); | ||
| 882 | } | ||
| 883 | |||
| 884 | JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {} | ||
| 885 | |||
| 886 | } // namespace Shader | 47 | } // namespace Shader |
| 887 | |||
| 888 | } // namespace Pica | 48 | } // namespace Pica |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index f37548306..078b2cba5 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -1,121 +1,30 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | 1 | // Copyright 2016 Citra Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <memory> |
| 8 | #include <cstddef> | 8 | #include <unordered_map> |
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | #include <nihstro/shader_bytecode.h> | ||
| 12 | #include <xbyak.h> | ||
| 13 | #include "common/bit_set.h" | ||
| 14 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 15 | #include "common/x64/emitter.h" | ||
| 16 | #include "video_core/shader/shader.h" | 10 | #include "video_core/shader/shader.h" |
| 17 | 11 | ||
| 18 | using nihstro::Instruction; | ||
| 19 | using nihstro::OpCode; | ||
| 20 | using nihstro::SwizzlePattern; | ||
| 21 | |||
| 22 | namespace Pica { | 12 | namespace Pica { |
| 23 | |||
| 24 | namespace Shader { | 13 | namespace Shader { |
| 25 | 14 | ||
| 26 | /// Memory allocated for each compiled shader (64Kb) | 15 | class JitShader; |
| 27 | constexpr size_t MAX_SHADER_SIZE = 1024 * 64; | ||
| 28 | 16 | ||
| 29 | /** | 17 | class JitX64Engine final : public ShaderEngine { |
| 30 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 | ||
| 31 | * code that can be executed on the host machine directly. | ||
| 32 | */ | ||
| 33 | class JitShader : public Xbyak::CodeGenerator { | ||
| 34 | public: | 18 | public: |
| 35 | JitShader(); | 19 | JitX64Engine(); |
| 36 | 20 | ~JitX64Engine() override; | |
| 37 | void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { | ||
| 38 | program(&setup, &state, instruction_labels[offset].getAddress()); | ||
| 39 | } | ||
| 40 | |||
| 41 | void Compile(); | ||
| 42 | 21 | ||
| 43 | void Compile_ADD(Instruction instr); | 22 | void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override; |
| 44 | void Compile_DP3(Instruction instr); | 23 | void Run(const ShaderSetup& setup, UnitState& state) const override; |
| 45 | void Compile_DP4(Instruction instr); | ||
| 46 | void Compile_DPH(Instruction instr); | ||
| 47 | void Compile_EX2(Instruction instr); | ||
| 48 | void Compile_LG2(Instruction instr); | ||
| 49 | void Compile_MUL(Instruction instr); | ||
| 50 | void Compile_SGE(Instruction instr); | ||
| 51 | void Compile_SLT(Instruction instr); | ||
| 52 | void Compile_FLR(Instruction instr); | ||
| 53 | void Compile_MAX(Instruction instr); | ||
| 54 | void Compile_MIN(Instruction instr); | ||
| 55 | void Compile_RCP(Instruction instr); | ||
| 56 | void Compile_RSQ(Instruction instr); | ||
| 57 | void Compile_MOVA(Instruction instr); | ||
| 58 | void Compile_MOV(Instruction instr); | ||
| 59 | void Compile_NOP(Instruction instr); | ||
| 60 | void Compile_END(Instruction instr); | ||
| 61 | void Compile_CALL(Instruction instr); | ||
| 62 | void Compile_CALLC(Instruction instr); | ||
| 63 | void Compile_CALLU(Instruction instr); | ||
| 64 | void Compile_IF(Instruction instr); | ||
| 65 | void Compile_LOOP(Instruction instr); | ||
| 66 | void Compile_JMP(Instruction instr); | ||
| 67 | void Compile_CMP(Instruction instr); | ||
| 68 | void Compile_MAD(Instruction instr); | ||
| 69 | 24 | ||
| 70 | private: | 25 | private: |
| 71 | void Compile_Block(unsigned end); | 26 | std::unordered_map<u64, std::unique_ptr<JitShader>> cache; |
| 72 | void Compile_NextInstr(); | ||
| 73 | |||
| 74 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, | ||
| 75 | Xbyak::Xmm dest); | ||
| 76 | void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest); | ||
| 77 | |||
| 78 | /** | ||
| 79 | * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying | ||
| 80 | * zero by inf. Clobbers `src2` and `scratch`. | ||
| 81 | */ | ||
| 82 | void Compile_SanitizedMul(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch); | ||
| 83 | |||
| 84 | void Compile_EvaluateCondition(Instruction instr); | ||
| 85 | void Compile_UniformCondition(Instruction instr); | ||
| 86 | |||
| 87 | /** | ||
| 88 | * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction. | ||
| 89 | */ | ||
| 90 | void Compile_Return(); | ||
| 91 | |||
| 92 | BitSet32 PersistentCallerSavedRegs(); | ||
| 93 | |||
| 94 | /** | ||
| 95 | * Assertion evaluated at compile-time, but only triggered if executed at runtime. | ||
| 96 | * @param msg Message to be logged if the assertion fails. | ||
| 97 | */ | ||
| 98 | void Compile_Assert(bool condition, const char* msg); | ||
| 99 | |||
| 100 | /** | ||
| 101 | * Analyzes the entire shader program for `CALL` instructions before emitting any code, | ||
| 102 | * identifying the locations where a return needs to be inserted. | ||
| 103 | */ | ||
| 104 | void FindReturnOffsets(); | ||
| 105 | |||
| 106 | /// Mapping of Pica VS instructions to pointers in the emitted code | ||
| 107 | std::array<Xbyak::Label, 1024> instruction_labels; | ||
| 108 | |||
| 109 | /// Offsets in code where a return needs to be inserted | ||
| 110 | std::vector<unsigned> return_offsets; | ||
| 111 | |||
| 112 | unsigned program_counter = 0; ///< Offset of the next instruction to decode | ||
| 113 | bool looping = false; ///< True if compiling a loop, used to check for nested loops | ||
| 114 | |||
| 115 | using CompiledShader = void(const void* setup, void* state, const u8* start_addr); | ||
| 116 | CompiledShader* program = nullptr; | ||
| 117 | }; | 27 | }; |
| 118 | 28 | ||
| 119 | } // Shader | 29 | } // namespace Shader |
| 120 | 30 | } // namespace Pica | |
| 121 | } // Pica | ||
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp new file mode 100644 index 000000000..49806e8c9 --- /dev/null +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp | |||
| @@ -0,0 +1,884 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstdint> | ||
| 8 | #include <nihstro/shader_bytecode.h> | ||
| 9 | #include <smmintrin.h> | ||
| 10 | #include <xmmintrin.h> | ||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "common/vector_math.h" | ||
| 14 | #include "common/x64/cpu_detect.h" | ||
| 15 | #include "common/x64/xbyak_abi.h" | ||
| 16 | #include "common/x64/xbyak_util.h" | ||
| 17 | #include "video_core/pica_state.h" | ||
| 18 | #include "video_core/pica_types.h" | ||
| 19 | #include "video_core/shader/shader.h" | ||
| 20 | #include "video_core/shader/shader_jit_x64_compiler.h" | ||
| 21 | |||
| 22 | using namespace Common::X64; | ||
| 23 | using namespace Xbyak::util; | ||
| 24 | using Xbyak::Label; | ||
| 25 | using Xbyak::Reg32; | ||
| 26 | using Xbyak::Reg64; | ||
| 27 | using Xbyak::Xmm; | ||
| 28 | |||
| 29 | namespace Pica { | ||
| 30 | |||
| 31 | namespace Shader { | ||
| 32 | |||
| 33 | typedef void (JitShader::*JitFunction)(Instruction instr); | ||
| 34 | |||
| 35 | const JitFunction instr_table[64] = { | ||
| 36 | &JitShader::Compile_ADD, // add | ||
| 37 | &JitShader::Compile_DP3, // dp3 | ||
| 38 | &JitShader::Compile_DP4, // dp4 | ||
| 39 | &JitShader::Compile_DPH, // dph | ||
| 40 | nullptr, // unknown | ||
| 41 | &JitShader::Compile_EX2, // ex2 | ||
| 42 | &JitShader::Compile_LG2, // lg2 | ||
| 43 | nullptr, // unknown | ||
| 44 | &JitShader::Compile_MUL, // mul | ||
| 45 | &JitShader::Compile_SGE, // sge | ||
| 46 | &JitShader::Compile_SLT, // slt | ||
| 47 | &JitShader::Compile_FLR, // flr | ||
| 48 | &JitShader::Compile_MAX, // max | ||
| 49 | &JitShader::Compile_MIN, // min | ||
| 50 | &JitShader::Compile_RCP, // rcp | ||
| 51 | &JitShader::Compile_RSQ, // rsq | ||
| 52 | nullptr, // unknown | ||
| 53 | nullptr, // unknown | ||
| 54 | &JitShader::Compile_MOVA, // mova | ||
| 55 | &JitShader::Compile_MOV, // mov | ||
| 56 | nullptr, // unknown | ||
| 57 | nullptr, // unknown | ||
| 58 | nullptr, // unknown | ||
| 59 | nullptr, // unknown | ||
| 60 | &JitShader::Compile_DPH, // dphi | ||
| 61 | nullptr, // unknown | ||
| 62 | &JitShader::Compile_SGE, // sgei | ||
| 63 | &JitShader::Compile_SLT, // slti | ||
| 64 | nullptr, // unknown | ||
| 65 | nullptr, // unknown | ||
| 66 | nullptr, // unknown | ||
| 67 | nullptr, // unknown | ||
| 68 | nullptr, // unknown | ||
| 69 | &JitShader::Compile_NOP, // nop | ||
| 70 | &JitShader::Compile_END, // end | ||
| 71 | nullptr, // break | ||
| 72 | &JitShader::Compile_CALL, // call | ||
| 73 | &JitShader::Compile_CALLC, // callc | ||
| 74 | &JitShader::Compile_CALLU, // callu | ||
| 75 | &JitShader::Compile_IF, // ifu | ||
| 76 | &JitShader::Compile_IF, // ifc | ||
| 77 | &JitShader::Compile_LOOP, // loop | ||
| 78 | nullptr, // emit | ||
| 79 | nullptr, // sete | ||
| 80 | &JitShader::Compile_JMP, // jmpc | ||
| 81 | &JitShader::Compile_JMP, // jmpu | ||
| 82 | &JitShader::Compile_CMP, // cmp | ||
| 83 | &JitShader::Compile_CMP, // cmp | ||
| 84 | &JitShader::Compile_MAD, // madi | ||
| 85 | &JitShader::Compile_MAD, // madi | ||
| 86 | &JitShader::Compile_MAD, // madi | ||
| 87 | &JitShader::Compile_MAD, // madi | ||
| 88 | &JitShader::Compile_MAD, // madi | ||
| 89 | &JitShader::Compile_MAD, // madi | ||
| 90 | &JitShader::Compile_MAD, // madi | ||
| 91 | &JitShader::Compile_MAD, // madi | ||
| 92 | &JitShader::Compile_MAD, // mad | ||
| 93 | &JitShader::Compile_MAD, // mad | ||
| 94 | &JitShader::Compile_MAD, // mad | ||
| 95 | &JitShader::Compile_MAD, // mad | ||
| 96 | &JitShader::Compile_MAD, // mad | ||
| 97 | &JitShader::Compile_MAD, // mad | ||
| 98 | &JitShader::Compile_MAD, // mad | ||
| 99 | &JitShader::Compile_MAD, // mad | ||
| 100 | }; | ||
| 101 | |||
| 102 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can | ||
| 103 | // be used as scratch registers within a compiler function. The other registers have designated | ||
| 104 | // purposes, as documented below: | ||
| 105 | |||
| 106 | /// Pointer to the uniform memory | ||
| 107 | static const Reg64 SETUP = r9; | ||
| 108 | /// The two 32-bit VS address offset registers set by the MOVA instruction | ||
| 109 | static const Reg64 ADDROFFS_REG_0 = r10; | ||
| 110 | static const Reg64 ADDROFFS_REG_1 = r11; | ||
| 111 | /// VS loop count register (Multiplied by 16) | ||
| 112 | static const Reg32 LOOPCOUNT_REG = r12d; | ||
| 113 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) | ||
| 114 | static const Reg32 LOOPCOUNT = esi; | ||
| 115 | /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) | ||
| 116 | static const Reg32 LOOPINC = edi; | ||
| 117 | /// Result of the previous CMP instruction for the X-component comparison | ||
| 118 | static const Reg64 COND0 = r13; | ||
| 119 | /// Result of the previous CMP instruction for the Y-component comparison | ||
| 120 | static const Reg64 COND1 = r14; | ||
| 121 | /// Pointer to the UnitState instance for the current VS unit | ||
| 122 | static const Reg64 STATE = r15; | ||
| 123 | /// SIMD scratch register | ||
| 124 | static const Xmm SCRATCH = xmm0; | ||
| 125 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register | ||
| 126 | static const Xmm SRC1 = xmm1; | ||
| 127 | /// Loaded with the second swizzled source register, otherwise can be used as a scratch register | ||
| 128 | static const Xmm SRC2 = xmm2; | ||
| 129 | /// Loaded with the third swizzled source register, otherwise can be used as a scratch register | ||
| 130 | static const Xmm SRC3 = xmm3; | ||
| 131 | /// Additional scratch register | ||
| 132 | static const Xmm SCRATCH2 = xmm4; | ||
| 133 | /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one | ||
| 134 | static const Xmm ONE = xmm14; | ||
| 135 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR | ||
| 136 | static const Xmm NEGBIT = xmm15; | ||
| 137 | |||
| 138 | // State registers that must not be modified by external functions calls | ||
| 139 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | ||
| 140 | static const BitSet32 persistent_regs = BuildRegSet({ | ||
| 141 | // Pointers to register blocks | ||
| 142 | SETUP, STATE, | ||
| 143 | // Cached registers | ||
| 144 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, | ||
| 145 | // Constants | ||
| 146 | ONE, NEGBIT, | ||
| 147 | }); | ||
| 148 | |||
| 149 | /// Raw constant for the source register selector that indicates no swizzling is performed | ||
| 150 | static const u8 NO_SRC_REG_SWIZZLE = 0x1b; | ||
| 151 | /// Raw constant for the destination register enable mask that indicates all components are enabled | ||
| 152 | static const u8 NO_DEST_REG_MASK = 0xf; | ||
| 153 | |||
| 154 | static void LogCritical(const char* msg) { | ||
| 155 | LOG_CRITICAL(HW_GPU, "%s", msg); | ||
| 156 | } | ||
| 157 | |||
| 158 | void JitShader::Compile_Assert(bool condition, const char* msg) { | ||
| 159 | if (!condition) { | ||
| 160 | mov(ABI_PARAM1, reinterpret_cast<size_t>(msg)); | ||
| 161 | CallFarFunction(*this, LogCritical); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | /** | ||
| 166 | * Loads and swizzles a source register into the specified XMM register. | ||
| 167 | * @param instr VS instruction, used for determining how to load the source register | ||
| 168 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) | ||
| 169 | * @param src_reg SourceRegister object corresponding to the source register to load | ||
| 170 | * @param dest Destination XMM register to store the loaded, swizzled source register | ||
| 171 | */ | ||
| 172 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, | ||
| 173 | Xmm dest) { | ||
| 174 | Reg64 src_ptr; | ||
| 175 | size_t src_offset; | ||
| 176 | |||
| 177 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | ||
| 178 | src_ptr = SETUP; | ||
| 179 | src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); | ||
| 180 | } else { | ||
| 181 | src_ptr = STATE; | ||
| 182 | src_offset = UnitState::InputOffset(src_reg); | ||
| 183 | } | ||
| 184 | |||
| 185 | int src_offset_disp = (int)src_offset; | ||
| 186 | ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type"); | ||
| 187 | |||
| 188 | unsigned operand_desc_id; | ||
| 189 | |||
| 190 | const bool is_inverted = | ||
| 191 | (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | ||
| 192 | |||
| 193 | unsigned address_register_index; | ||
| 194 | unsigned offset_src; | ||
| 195 | |||
| 196 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | ||
| 197 | instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | ||
| 198 | operand_desc_id = instr.mad.operand_desc_id; | ||
| 199 | offset_src = is_inverted ? 3 : 2; | ||
| 200 | address_register_index = instr.mad.address_register_index; | ||
| 201 | } else { | ||
| 202 | operand_desc_id = instr.common.operand_desc_id; | ||
| 203 | offset_src = is_inverted ? 2 : 1; | ||
| 204 | address_register_index = instr.common.address_register_index; | ||
| 205 | } | ||
| 206 | |||
| 207 | if (src_num == offset_src && address_register_index != 0) { | ||
| 208 | switch (address_register_index) { | ||
| 209 | case 1: // address offset 1 | ||
| 210 | movaps(dest, xword[src_ptr + ADDROFFS_REG_0 + src_offset_disp]); | ||
| 211 | break; | ||
| 212 | case 2: // address offset 2 | ||
| 213 | movaps(dest, xword[src_ptr + ADDROFFS_REG_1 + src_offset_disp]); | ||
| 214 | break; | ||
| 215 | case 3: // address offset 3 | ||
| 216 | movaps(dest, xword[src_ptr + LOOPCOUNT_REG.cvt64() + src_offset_disp]); | ||
| 217 | break; | ||
| 218 | default: | ||
| 219 | UNREACHABLE(); | ||
| 220 | break; | ||
| 221 | } | ||
| 222 | } else { | ||
| 223 | // Load the source | ||
| 224 | movaps(dest, xword[src_ptr + src_offset_disp]); | ||
| 225 | } | ||
| 226 | |||
| 227 | SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]}; | ||
| 228 | |||
| 229 | // Generate instructions for source register swizzling as needed | ||
| 230 | u8 sel = swiz.GetRawSelector(src_num); | ||
| 231 | if (sel != NO_SRC_REG_SWIZZLE) { | ||
| 232 | // Selector component order needs to be reversed for the SHUFPS instruction | ||
| 233 | sel = ((sel & 0xc0) >> 6) | ((sel & 3) << 6) | ((sel & 0xc) << 2) | ((sel & 0x30) >> 2); | ||
| 234 | |||
| 235 | // Shuffle inputs for swizzle | ||
| 236 | shufps(dest, dest, sel); | ||
| 237 | } | ||
| 238 | |||
| 239 | // If the source register should be negated, flip the negative bit using XOR | ||
| 240 | const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3}; | ||
| 241 | if (negate[src_num - 1]) { | ||
| 242 | xorps(dest, NEGBIT); | ||
| 243 | } | ||
| 244 | } | ||
| 245 | |||
| 246 | void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { | ||
| 247 | DestRegister dest; | ||
| 248 | unsigned operand_desc_id; | ||
| 249 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | ||
| 250 | instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | ||
| 251 | operand_desc_id = instr.mad.operand_desc_id; | ||
| 252 | dest = instr.mad.dest.Value(); | ||
| 253 | } else { | ||
| 254 | operand_desc_id = instr.common.operand_desc_id; | ||
| 255 | dest = instr.common.dest.Value(); | ||
| 256 | } | ||
| 257 | |||
| 258 | SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]}; | ||
| 259 | |||
| 260 | size_t dest_offset_disp = UnitState::OutputOffset(dest); | ||
| 261 | |||
| 262 | // If all components are enabled, write the result to the destination register | ||
| 263 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | ||
| 264 | // Store dest back to memory | ||
| 265 | movaps(xword[STATE + dest_offset_disp], src); | ||
| 266 | |||
| 267 | } else { | ||
| 268 | // Not all components are enabled, so mask the result when storing to the destination | ||
| 269 | // register... | ||
| 270 | movaps(SCRATCH, xword[STATE + dest_offset_disp]); | ||
| 271 | |||
| 272 | if (Common::GetCPUCaps().sse4_1) { | ||
| 273 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | | ||
| 274 | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||
| 275 | blendps(SCRATCH, src, mask); | ||
| 276 | } else { | ||
| 277 | movaps(SCRATCH2, src); | ||
| 278 | unpckhps(SCRATCH2, SCRATCH); // Unpack X/Y components of source and destination | ||
| 279 | unpcklps(SCRATCH, src); // Unpack Z/W components of source and destination | ||
| 280 | |||
| 281 | // Compute selector to selectively copy source components to destination for SHUFPS | ||
| 282 | // instruction | ||
| 283 | u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | | ||
| 284 | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | | ||
| 285 | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | | ||
| 286 | ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6); | ||
| 287 | shufps(SCRATCH, SCRATCH2, sel); | ||
| 288 | } | ||
| 289 | |||
| 290 | // Store dest back to memory | ||
| 291 | movaps(xword[STATE + dest_offset_disp], SCRATCH); | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) { | ||
| 296 | movaps(scratch, src1); | ||
| 297 | cmpordps(scratch, src2); | ||
| 298 | |||
| 299 | mulps(src1, src2); | ||
| 300 | |||
| 301 | movaps(src2, src1); | ||
| 302 | cmpunordps(src2, src2); | ||
| 303 | |||
| 304 | xorps(scratch, src2); | ||
| 305 | andps(src1, scratch); | ||
| 306 | } | ||
| 307 | |||
| 308 | void JitShader::Compile_EvaluateCondition(Instruction instr) { | ||
| 309 | // Note: NXOR is used below to check for equality | ||
| 310 | switch (instr.flow_control.op) { | ||
| 311 | case Instruction::FlowControlType::Or: | ||
| 312 | mov(eax, COND0); | ||
| 313 | mov(ebx, COND1); | ||
| 314 | xor(eax, (instr.flow_control.refx.Value() ^ 1)); | ||
| 315 | xor(ebx, (instr.flow_control.refy.Value() ^ 1)); | ||
| 316 | or (eax, ebx); | ||
| 317 | break; | ||
| 318 | |||
| 319 | case Instruction::FlowControlType::And: | ||
| 320 | mov(eax, COND0); | ||
| 321 | mov(ebx, COND1); | ||
| 322 | xor(eax, (instr.flow_control.refx.Value() ^ 1)); | ||
| 323 | xor(ebx, (instr.flow_control.refy.Value() ^ 1)); | ||
| 324 | and(eax, ebx); | ||
| 325 | break; | ||
| 326 | |||
| 327 | case Instruction::FlowControlType::JustX: | ||
| 328 | mov(eax, COND0); | ||
| 329 | xor(eax, (instr.flow_control.refx.Value() ^ 1)); | ||
| 330 | break; | ||
| 331 | |||
| 332 | case Instruction::FlowControlType::JustY: | ||
| 333 | mov(eax, COND1); | ||
| 334 | xor(eax, (instr.flow_control.refy.Value() ^ 1)); | ||
| 335 | break; | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 339 | void JitShader::Compile_UniformCondition(Instruction instr) { | ||
| 340 | size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); | ||
| 341 | cmp(byte[SETUP + offset], 0); | ||
| 342 | } | ||
| 343 | |||
| 344 | BitSet32 JitShader::PersistentCallerSavedRegs() { | ||
| 345 | return persistent_regs & ABI_ALL_CALLER_SAVED; | ||
| 346 | } | ||
| 347 | |||
| 348 | void JitShader::Compile_ADD(Instruction instr) { | ||
| 349 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 350 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 351 | addps(SRC1, SRC2); | ||
| 352 | Compile_DestEnable(instr, SRC1); | ||
| 353 | } | ||
| 354 | |||
| 355 | void JitShader::Compile_DP3(Instruction instr) { | ||
| 356 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 357 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 358 | |||
| 359 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 360 | |||
| 361 | movaps(SRC2, SRC1); | ||
| 362 | shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1)); | ||
| 363 | |||
| 364 | movaps(SRC3, SRC1); | ||
| 365 | shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2)); | ||
| 366 | |||
| 367 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); | ||
| 368 | addps(SRC1, SRC2); | ||
| 369 | addps(SRC1, SRC3); | ||
| 370 | |||
| 371 | Compile_DestEnable(instr, SRC1); | ||
| 372 | } | ||
| 373 | |||
| 374 | void JitShader::Compile_DP4(Instruction instr) { | ||
| 375 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 376 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 377 | |||
| 378 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 379 | |||
| 380 | movaps(SRC2, SRC1); | ||
| 381 | shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY | ||
| 382 | addps(SRC1, SRC2); | ||
| 383 | |||
| 384 | movaps(SRC2, SRC1); | ||
| 385 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX | ||
| 386 | addps(SRC1, SRC2); | ||
| 387 | |||
| 388 | Compile_DestEnable(instr, SRC1); | ||
| 389 | } | ||
| 390 | |||
| 391 | void JitShader::Compile_DPH(Instruction instr) { | ||
| 392 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { | ||
| 393 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 394 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 395 | } else { | ||
| 396 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 397 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 398 | } | ||
| 399 | |||
| 400 | if (Common::GetCPUCaps().sse4_1) { | ||
| 401 | // Set 4th component to 1.0 | ||
| 402 | blendps(SRC1, ONE, 0b1000); | ||
| 403 | } else { | ||
| 404 | // Set 4th component to 1.0 | ||
| 405 | movaps(SCRATCH, SRC1); | ||
| 406 | unpckhps(SCRATCH, ONE); // XYZW, 1111 -> Z1__ | ||
| 407 | unpcklpd(SRC1, SCRATCH); // XYZW, Z1__ -> XYZ1 | ||
| 408 | } | ||
| 409 | |||
| 410 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 411 | |||
| 412 | movaps(SRC2, SRC1); | ||
| 413 | shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY | ||
| 414 | addps(SRC1, SRC2); | ||
| 415 | |||
| 416 | movaps(SRC2, SRC1); | ||
| 417 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX | ||
| 418 | addps(SRC1, SRC2); | ||
| 419 | |||
| 420 | Compile_DestEnable(instr, SRC1); | ||
| 421 | } | ||
| 422 | |||
| 423 | void JitShader::Compile_EX2(Instruction instr) { | ||
| 424 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 425 | movss(xmm0, SRC1); // ABI_PARAM1 | ||
| 426 | |||
| 427 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 428 | CallFarFunction(*this, exp2f); | ||
| 429 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 430 | |||
| 431 | shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN | ||
| 432 | movaps(SRC1, xmm0); | ||
| 433 | Compile_DestEnable(instr, SRC1); | ||
| 434 | } | ||
| 435 | |||
| 436 | void JitShader::Compile_LG2(Instruction instr) { | ||
| 437 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 438 | movss(xmm0, SRC1); // ABI_PARAM1 | ||
| 439 | |||
| 440 | ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 441 | CallFarFunction(*this, log2f); | ||
| 442 | ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 443 | |||
| 444 | shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN | ||
| 445 | movaps(SRC1, xmm0); | ||
| 446 | Compile_DestEnable(instr, SRC1); | ||
| 447 | } | ||
| 448 | |||
| 449 | void JitShader::Compile_MUL(Instruction instr) { | ||
| 450 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 451 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 452 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 453 | Compile_DestEnable(instr, SRC1); | ||
| 454 | } | ||
| 455 | |||
| 456 | void JitShader::Compile_SGE(Instruction instr) { | ||
| 457 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { | ||
| 458 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 459 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 460 | } else { | ||
| 461 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 462 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 463 | } | ||
| 464 | |||
| 465 | cmpleps(SRC2, SRC1); | ||
| 466 | andps(SRC2, ONE); | ||
| 467 | |||
| 468 | Compile_DestEnable(instr, SRC2); | ||
| 469 | } | ||
| 470 | |||
| 471 | void JitShader::Compile_SLT(Instruction instr) { | ||
| 472 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { | ||
| 473 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 474 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 475 | } else { | ||
| 476 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 477 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 478 | } | ||
| 479 | |||
| 480 | cmpltps(SRC1, SRC2); | ||
| 481 | andps(SRC1, ONE); | ||
| 482 | |||
| 483 | Compile_DestEnable(instr, SRC1); | ||
| 484 | } | ||
| 485 | |||
| 486 | void JitShader::Compile_FLR(Instruction instr) { | ||
| 487 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 488 | |||
| 489 | if (Common::GetCPUCaps().sse4_1) { | ||
| 490 | roundps(SRC1, SRC1, _MM_FROUND_FLOOR); | ||
| 491 | } else { | ||
| 492 | cvttps2dq(SRC1, SRC1); | ||
| 493 | cvtdq2ps(SRC1, SRC1); | ||
| 494 | } | ||
| 495 | |||
| 496 | Compile_DestEnable(instr, SRC1); | ||
| 497 | } | ||
| 498 | |||
| 499 | void JitShader::Compile_MAX(Instruction instr) { | ||
| 500 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 501 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 502 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | ||
| 503 | maxps(SRC1, SRC2); | ||
| 504 | Compile_DestEnable(instr, SRC1); | ||
| 505 | } | ||
| 506 | |||
| 507 | void JitShader::Compile_MIN(Instruction instr) { | ||
| 508 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 509 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 510 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | ||
| 511 | minps(SRC1, SRC2); | ||
| 512 | Compile_DestEnable(instr, SRC1); | ||
| 513 | } | ||
| 514 | |||
| 515 | void JitShader::Compile_MOVA(Instruction instr) { | ||
| 516 | SwizzlePattern swiz = {(*swizzle_data)[instr.common.operand_desc_id]}; | ||
| 517 | |||
| 518 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { | ||
| 519 | return; // NoOp | ||
| 520 | } | ||
| 521 | |||
| 522 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 523 | |||
| 524 | // Convert floats to integers using truncation (only care about X and Y components) | ||
| 525 | cvttps2dq(SRC1, SRC1); | ||
| 526 | |||
| 527 | // Get result | ||
| 528 | movq(rax, SRC1); | ||
| 529 | |||
| 530 | // Handle destination enable | ||
| 531 | if (swiz.DestComponentEnabled(0) && swiz.DestComponentEnabled(1)) { | ||
| 532 | // Move and sign-extend low 32 bits | ||
| 533 | movsxd(ADDROFFS_REG_0, eax); | ||
| 534 | |||
| 535 | // Move and sign-extend high 32 bits | ||
| 536 | shr(rax, 32); | ||
| 537 | movsxd(ADDROFFS_REG_1, eax); | ||
| 538 | |||
| 539 | // Multiply by 16 to be used as an offset later | ||
| 540 | shl(ADDROFFS_REG_0, 4); | ||
| 541 | shl(ADDROFFS_REG_1, 4); | ||
| 542 | } else { | ||
| 543 | if (swiz.DestComponentEnabled(0)) { | ||
| 544 | // Move and sign-extend low 32 bits | ||
| 545 | movsxd(ADDROFFS_REG_0, eax); | ||
| 546 | |||
| 547 | // Multiply by 16 to be used as an offset later | ||
| 548 | shl(ADDROFFS_REG_0, 4); | ||
| 549 | } else if (swiz.DestComponentEnabled(1)) { | ||
| 550 | // Move and sign-extend high 32 bits | ||
| 551 | shr(rax, 32); | ||
| 552 | movsxd(ADDROFFS_REG_1, eax); | ||
| 553 | |||
| 554 | // Multiply by 16 to be used as an offset later | ||
| 555 | shl(ADDROFFS_REG_1, 4); | ||
| 556 | } | ||
| 557 | } | ||
| 558 | } | ||
| 559 | |||
| 560 | void JitShader::Compile_MOV(Instruction instr) { | ||
| 561 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 562 | Compile_DestEnable(instr, SRC1); | ||
| 563 | } | ||
| 564 | |||
| 565 | void JitShader::Compile_RCP(Instruction instr) { | ||
| 566 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 567 | |||
| 568 | // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica | ||
| 569 | // performs this operation more accurately. This should be checked on hardware. | ||
| 570 | rcpss(SRC1, SRC1); | ||
| 571 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX | ||
| 572 | |||
| 573 | Compile_DestEnable(instr, SRC1); | ||
| 574 | } | ||
| 575 | |||
| 576 | void JitShader::Compile_RSQ(Instruction instr) { | ||
| 577 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 578 | |||
| 579 | // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica | ||
| 580 | // performs this operation more accurately. This should be checked on hardware. | ||
| 581 | rsqrtss(SRC1, SRC1); | ||
| 582 | shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX | ||
| 583 | |||
| 584 | Compile_DestEnable(instr, SRC1); | ||
| 585 | } | ||
| 586 | |||
| 587 | void JitShader::Compile_NOP(Instruction instr) {} | ||
| 588 | |||
| 589 | void JitShader::Compile_END(Instruction instr) { | ||
| 590 | ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8); | ||
| 591 | ret(); | ||
| 592 | } | ||
| 593 | |||
| 594 | void JitShader::Compile_CALL(Instruction instr) { | ||
| 595 | // Push offset of the return | ||
| 596 | push(qword, (instr.flow_control.dest_offset + instr.flow_control.num_instructions)); | ||
| 597 | |||
| 598 | // Call the subroutine | ||
| 599 | call(instruction_labels[instr.flow_control.dest_offset]); | ||
| 600 | |||
| 601 | // Skip over the return offset that's on the stack | ||
| 602 | add(rsp, 8); | ||
| 603 | } | ||
| 604 | |||
| 605 | void JitShader::Compile_CALLC(Instruction instr) { | ||
| 606 | Compile_EvaluateCondition(instr); | ||
| 607 | Label b; | ||
| 608 | jz(b); | ||
| 609 | Compile_CALL(instr); | ||
| 610 | L(b); | ||
| 611 | } | ||
| 612 | |||
| 613 | void JitShader::Compile_CALLU(Instruction instr) { | ||
| 614 | Compile_UniformCondition(instr); | ||
| 615 | Label b; | ||
| 616 | jz(b); | ||
| 617 | Compile_CALL(instr); | ||
| 618 | L(b); | ||
| 619 | } | ||
| 620 | |||
| 621 | void JitShader::Compile_CMP(Instruction instr) { | ||
| 622 | using Op = Instruction::Common::CompareOpType::Op; | ||
| 623 | Op op_x = instr.common.compare_op.x; | ||
| 624 | Op op_y = instr.common.compare_op.y; | ||
| 625 | |||
| 626 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 627 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 628 | |||
| 629 | // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to | ||
| 630 | // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here | ||
| 631 | // because they don't match when used with NaNs. | ||
| 632 | static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE}; | ||
| 633 | |||
| 634 | bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); | ||
| 635 | Xmm lhs_x = invert_op_x ? SRC2 : SRC1; | ||
| 636 | Xmm rhs_x = invert_op_x ? SRC1 : SRC2; | ||
| 637 | |||
| 638 | if (op_x == op_y) { | ||
| 639 | // Compare X-component and Y-component together | ||
| 640 | cmpps(lhs_x, rhs_x, cmp[op_x]); | ||
| 641 | movq(COND0, lhs_x); | ||
| 642 | |||
| 643 | mov(COND1, COND0); | ||
| 644 | } else { | ||
| 645 | bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual); | ||
| 646 | Xmm lhs_y = invert_op_y ? SRC2 : SRC1; | ||
| 647 | Xmm rhs_y = invert_op_y ? SRC1 : SRC2; | ||
| 648 | |||
| 649 | // Compare X-component | ||
| 650 | movaps(SCRATCH, lhs_x); | ||
| 651 | cmpss(SCRATCH, rhs_x, cmp[op_x]); | ||
| 652 | |||
| 653 | // Compare Y-component | ||
| 654 | cmpps(lhs_y, rhs_y, cmp[op_y]); | ||
| 655 | |||
| 656 | movq(COND0, SCRATCH); | ||
| 657 | movq(COND1, lhs_y); | ||
| 658 | } | ||
| 659 | |||
| 660 | shr(COND0.cvt32(), 31); // ignores upper 32 bits in source | ||
| 661 | shr(COND1, 63); | ||
| 662 | } | ||
| 663 | |||
| 664 | void JitShader::Compile_MAD(Instruction instr) { | ||
| 665 | Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); | ||
| 666 | |||
| 667 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | ||
| 668 | Compile_SwizzleSrc(instr, 2, instr.mad.src2i, SRC2); | ||
| 669 | Compile_SwizzleSrc(instr, 3, instr.mad.src3i, SRC3); | ||
| 670 | } else { | ||
| 671 | Compile_SwizzleSrc(instr, 2, instr.mad.src2, SRC2); | ||
| 672 | Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3); | ||
| 673 | } | ||
| 674 | |||
| 675 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||
| 676 | addps(SRC1, SRC3); | ||
| 677 | |||
| 678 | Compile_DestEnable(instr, SRC1); | ||
| 679 | } | ||
| 680 | |||
| 681 | void JitShader::Compile_IF(Instruction instr) { | ||
| 682 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, | ||
| 683 | "Backwards if-statements not supported"); | ||
| 684 | Label l_else, l_endif; | ||
| 685 | |||
| 686 | // Evaluate the "IF" condition | ||
| 687 | if (instr.opcode.Value() == OpCode::Id::IFU) { | ||
| 688 | Compile_UniformCondition(instr); | ||
| 689 | } else if (instr.opcode.Value() == OpCode::Id::IFC) { | ||
| 690 | Compile_EvaluateCondition(instr); | ||
| 691 | } | ||
| 692 | jz(l_else, T_NEAR); | ||
| 693 | |||
| 694 | // Compile the code that corresponds to the condition evaluating as true | ||
| 695 | Compile_Block(instr.flow_control.dest_offset); | ||
| 696 | |||
| 697 | // If there isn't an "ELSE" condition, we are done here | ||
| 698 | if (instr.flow_control.num_instructions == 0) { | ||
| 699 | L(l_else); | ||
| 700 | return; | ||
| 701 | } | ||
| 702 | |||
| 703 | jmp(l_endif, T_NEAR); | ||
| 704 | |||
| 705 | L(l_else); | ||
| 706 | // This code corresponds to the "ELSE" condition | ||
| 707 | // Comple the code that corresponds to the condition evaluating as false | ||
| 708 | Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 709 | |||
| 710 | L(l_endif); | ||
| 711 | } | ||
| 712 | |||
| 713 | void JitShader::Compile_LOOP(Instruction instr) { | ||
| 714 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, | ||
| 715 | "Backwards loops not supported"); | ||
| 716 | Compile_Assert(!looping, "Nested loops not supported"); | ||
| 717 | |||
| 718 | looping = true; | ||
| 719 | |||
| 720 | // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. | ||
| 721 | // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by | ||
| 722 | // 4 bits) to be used as an offset into the 16-byte vector registers later | ||
| 723 | size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); | ||
| 724 | mov(LOOPCOUNT, dword[SETUP + offset]); | ||
| 725 | mov(LOOPCOUNT_REG, LOOPCOUNT); | ||
| 726 | shr(LOOPCOUNT_REG, 4); | ||
| 727 | and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start | ||
| 728 | mov(LOOPINC, LOOPCOUNT); | ||
| 729 | shr(LOOPINC, 12); | ||
| 730 | and(LOOPINC, 0xFF0); // Z-component is the incrementer | ||
| 731 | movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count | ||
| 732 | add(LOOPCOUNT, 1); // Iteration count is X-component + 1 | ||
| 733 | |||
| 734 | Label l_loop_start; | ||
| 735 | L(l_loop_start); | ||
| 736 | |||
| 737 | Compile_Block(instr.flow_control.dest_offset + 1); | ||
| 738 | |||
| 739 | add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component | ||
| 740 | sub(LOOPCOUNT, 1); // Increment loop count by 1 | ||
| 741 | jnz(l_loop_start); // Loop if not equal | ||
| 742 | |||
| 743 | looping = false; | ||
| 744 | } | ||
| 745 | |||
| 746 | void JitShader::Compile_JMP(Instruction instr) { | ||
| 747 | if (instr.opcode.Value() == OpCode::Id::JMPC) | ||
| 748 | Compile_EvaluateCondition(instr); | ||
| 749 | else if (instr.opcode.Value() == OpCode::Id::JMPU) | ||
| 750 | Compile_UniformCondition(instr); | ||
| 751 | else | ||
| 752 | UNREACHABLE(); | ||
| 753 | |||
| 754 | bool inverted_condition = | ||
| 755 | (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1); | ||
| 756 | |||
| 757 | Label& b = instruction_labels[instr.flow_control.dest_offset]; | ||
| 758 | if (inverted_condition) { | ||
| 759 | jz(b, T_NEAR); | ||
| 760 | } else { | ||
| 761 | jnz(b, T_NEAR); | ||
| 762 | } | ||
| 763 | } | ||
| 764 | |||
| 765 | void JitShader::Compile_Block(unsigned end) { | ||
| 766 | while (program_counter < end) { | ||
| 767 | Compile_NextInstr(); | ||
| 768 | } | ||
| 769 | } | ||
| 770 | |||
| 771 | void JitShader::Compile_Return() { | ||
| 772 | // Peek return offset on the stack and check if we're at that offset | ||
| 773 | mov(rax, qword[rsp + 8]); | ||
| 774 | cmp(eax, (program_counter)); | ||
| 775 | |||
| 776 | // If so, jump back to before CALL | ||
| 777 | Label b; | ||
| 778 | jnz(b); | ||
| 779 | ret(); | ||
| 780 | L(b); | ||
| 781 | } | ||
| 782 | |||
| 783 | void JitShader::Compile_NextInstr() { | ||
| 784 | if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) { | ||
| 785 | Compile_Return(); | ||
| 786 | } | ||
| 787 | |||
| 788 | L(instruction_labels[program_counter]); | ||
| 789 | |||
| 790 | Instruction instr = {(*program_code)[program_counter++]}; | ||
| 791 | |||
| 792 | OpCode::Id opcode = instr.opcode.Value(); | ||
| 793 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; | ||
| 794 | |||
| 795 | if (instr_func) { | ||
| 796 | // JIT the instruction! | ||
| 797 | ((*this).*instr_func)(instr); | ||
| 798 | } else { | ||
| 799 | // Unhandled instruction | ||
| 800 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", | ||
| 801 | instr.opcode.Value().EffectiveOpCode(), instr.hex); | ||
| 802 | } | ||
| 803 | } | ||
| 804 | |||
| 805 | void JitShader::FindReturnOffsets() { | ||
| 806 | return_offsets.clear(); | ||
| 807 | |||
| 808 | for (size_t offset = 0; offset < program_code->size(); ++offset) { | ||
| 809 | Instruction instr = {(*program_code)[offset]}; | ||
| 810 | |||
| 811 | switch (instr.opcode.Value()) { | ||
| 812 | case OpCode::Id::CALL: | ||
| 813 | case OpCode::Id::CALLC: | ||
| 814 | case OpCode::Id::CALLU: | ||
| 815 | return_offsets.push_back(instr.flow_control.dest_offset + | ||
| 816 | instr.flow_control.num_instructions); | ||
| 817 | break; | ||
| 818 | default: | ||
| 819 | break; | ||
| 820 | } | ||
| 821 | } | ||
| 822 | |||
| 823 | // Sort for efficient binary search later | ||
| 824 | std::sort(return_offsets.begin(), return_offsets.end()); | ||
| 825 | } | ||
| 826 | |||
| 827 | void JitShader::Compile(const std::array<u32, 1024>* program_code_, | ||
| 828 | const std::array<u32, 1024>* swizzle_data_) { | ||
| 829 | program_code = program_code_; | ||
| 830 | swizzle_data = swizzle_data_; | ||
| 831 | |||
| 832 | // Reset flow control state | ||
| 833 | program = (CompiledShader*)getCurr(); | ||
| 834 | program_counter = 0; | ||
| 835 | looping = false; | ||
| 836 | instruction_labels.fill(Xbyak::Label()); | ||
| 837 | |||
| 838 | // Find all `CALL` instructions and identify return locations | ||
| 839 | FindReturnOffsets(); | ||
| 840 | |||
| 841 | // The stack pointer is 8 modulo 16 at the entry of a procedure | ||
| 842 | ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8); | ||
| 843 | |||
| 844 | mov(SETUP, ABI_PARAM1); | ||
| 845 | mov(STATE, ABI_PARAM2); | ||
| 846 | |||
| 847 | // Zero address/loop registers | ||
| 848 | xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); | ||
| 849 | xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); | ||
| 850 | xor(LOOPCOUNT_REG, LOOPCOUNT_REG); | ||
| 851 | |||
| 852 | // Used to set a register to one | ||
| 853 | static const __m128 one = {1.f, 1.f, 1.f, 1.f}; | ||
| 854 | mov(rax, reinterpret_cast<size_t>(&one)); | ||
| 855 | movaps(ONE, xword[rax]); | ||
| 856 | |||
| 857 | // Used to negate registers | ||
| 858 | static const __m128 neg = {-0.f, -0.f, -0.f, -0.f}; | ||
| 859 | mov(rax, reinterpret_cast<size_t>(&neg)); | ||
| 860 | movaps(NEGBIT, xword[rax]); | ||
| 861 | |||
| 862 | // Jump to start of the shader program | ||
| 863 | jmp(ABI_PARAM3); | ||
| 864 | |||
| 865 | // Compile entire program | ||
| 866 | Compile_Block(static_cast<unsigned>(program_code->size())); | ||
| 867 | |||
| 868 | // Free memory that's no longer needed | ||
| 869 | program_code = nullptr; | ||
| 870 | swizzle_data = nullptr; | ||
| 871 | return_offsets.clear(); | ||
| 872 | return_offsets.shrink_to_fit(); | ||
| 873 | |||
| 874 | ready(); | ||
| 875 | |||
| 876 | ASSERT_MSG(getSize() <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); | ||
| 877 | LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", getSize()); | ||
| 878 | } | ||
| 879 | |||
| 880 | JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {} | ||
| 881 | |||
| 882 | } // namespace Shader | ||
| 883 | |||
| 884 | } // namespace Pica | ||
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h new file mode 100644 index 000000000..29e9875ea --- /dev/null +++ b/src/video_core/shader/shader_jit_x64_compiler.h | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | #include <nihstro/shader_bytecode.h> | ||
| 12 | #include <xbyak.h> | ||
| 13 | #include "common/bit_set.h" | ||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "common/x64/emitter.h" | ||
| 16 | #include "video_core/shader/shader.h" | ||
| 17 | |||
| 18 | using nihstro::Instruction; | ||
| 19 | using nihstro::OpCode; | ||
| 20 | using nihstro::SwizzlePattern; | ||
| 21 | |||
| 22 | namespace Pica { | ||
| 23 | |||
| 24 | namespace Shader { | ||
| 25 | |||
| 26 | /// Memory allocated for each compiled shader (64Kb) | ||
| 27 | constexpr size_t MAX_SHADER_SIZE = 1024 * 64; | ||
| 28 | |||
| 29 | /** | ||
| 30 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 | ||
| 31 | * code that can be executed on the host machine directly. | ||
| 32 | */ | ||
| 33 | class JitShader : public Xbyak::CodeGenerator { | ||
| 34 | public: | ||
| 35 | JitShader(); | ||
| 36 | |||
| 37 | void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { | ||
| 38 | program(&setup, &state, instruction_labels[offset].getAddress()); | ||
| 39 | } | ||
| 40 | |||
| 41 | void Compile(const std::array<u32, 1024>* program_code, | ||
| 42 | const std::array<u32, 1024>* swizzle_data); | ||
| 43 | |||
| 44 | void Compile_ADD(Instruction instr); | ||
| 45 | void Compile_DP3(Instruction instr); | ||
| 46 | void Compile_DP4(Instruction instr); | ||
| 47 | void Compile_DPH(Instruction instr); | ||
| 48 | void Compile_EX2(Instruction instr); | ||
| 49 | void Compile_LG2(Instruction instr); | ||
| 50 | void Compile_MUL(Instruction instr); | ||
| 51 | void Compile_SGE(Instruction instr); | ||
| 52 | void Compile_SLT(Instruction instr); | ||
| 53 | void Compile_FLR(Instruction instr); | ||
| 54 | void Compile_MAX(Instruction instr); | ||
| 55 | void Compile_MIN(Instruction instr); | ||
| 56 | void Compile_RCP(Instruction instr); | ||
| 57 | void Compile_RSQ(Instruction instr); | ||
| 58 | void Compile_MOVA(Instruction instr); | ||
| 59 | void Compile_MOV(Instruction instr); | ||
| 60 | void Compile_NOP(Instruction instr); | ||
| 61 | void Compile_END(Instruction instr); | ||
| 62 | void Compile_CALL(Instruction instr); | ||
| 63 | void Compile_CALLC(Instruction instr); | ||
| 64 | void Compile_CALLU(Instruction instr); | ||
| 65 | void Compile_IF(Instruction instr); | ||
| 66 | void Compile_LOOP(Instruction instr); | ||
| 67 | void Compile_JMP(Instruction instr); | ||
| 68 | void Compile_CMP(Instruction instr); | ||
| 69 | void Compile_MAD(Instruction instr); | ||
| 70 | |||
| 71 | private: | ||
| 72 | void Compile_Block(unsigned end); | ||
| 73 | void Compile_NextInstr(); | ||
| 74 | |||
| 75 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, | ||
| 76 | Xbyak::Xmm dest); | ||
| 77 | void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest); | ||
| 78 | |||
| 79 | /** | ||
| 80 | * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying | ||
| 81 | * zero by inf. Clobbers `src2` and `scratch`. | ||
| 82 | */ | ||
| 83 | void Compile_SanitizedMul(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch); | ||
| 84 | |||
| 85 | void Compile_EvaluateCondition(Instruction instr); | ||
| 86 | void Compile_UniformCondition(Instruction instr); | ||
| 87 | |||
| 88 | /** | ||
| 89 | * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction. | ||
| 90 | */ | ||
| 91 | void Compile_Return(); | ||
| 92 | |||
| 93 | BitSet32 PersistentCallerSavedRegs(); | ||
| 94 | |||
| 95 | /** | ||
| 96 | * Assertion evaluated at compile-time, but only triggered if executed at runtime. | ||
| 97 | * @param msg Message to be logged if the assertion fails. | ||
| 98 | */ | ||
| 99 | void Compile_Assert(bool condition, const char* msg); | ||
| 100 | |||
| 101 | /** | ||
| 102 | * Analyzes the entire shader program for `CALL` instructions before emitting any code, | ||
| 103 | * identifying the locations where a return needs to be inserted. | ||
| 104 | */ | ||
| 105 | void FindReturnOffsets(); | ||
| 106 | |||
| 107 | const std::array<u32, 1024>* program_code = nullptr; | ||
| 108 | const std::array<u32, 1024>* swizzle_data = nullptr; | ||
| 109 | |||
| 110 | /// Mapping of Pica VS instructions to pointers in the emitted code | ||
| 111 | std::array<Xbyak::Label, 1024> instruction_labels; | ||
| 112 | |||
| 113 | /// Offsets in code where a return needs to be inserted | ||
| 114 | std::vector<unsigned> return_offsets; | ||
| 115 | |||
| 116 | unsigned program_counter = 0; ///< Offset of the next instruction to decode | ||
| 117 | bool looping = false; ///< True if compiling a loop, used to check for nested loops | ||
| 118 | |||
| 119 | using CompiledShader = void(const void* setup, void* state, const u8* start_addr); | ||
| 120 | CompiledShader* program = nullptr; | ||
| 121 | }; | ||
| 122 | |||
| 123 | } // Shader | ||
| 124 | |||
| 125 | } // Pica | ||