summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt17
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt172
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt62
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt118
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt2
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt59
-rw-r--r--src/android/app/src/main/jni/native.cpp21
-rw-r--r--src/android/app/src/main/res/values/strings.xml24
-rw-r--r--src/audio_core/device/audio_buffers.h8
-rw-r--r--src/audio_core/device/device_session.cpp12
-rw-r--r--src/audio_core/device/device_session.h7
-rw-r--r--src/audio_core/in/audio_in_system.cpp5
-rw-r--r--src/audio_core/out/audio_out_system.cpp4
-rw-r--r--src/audio_core/renderer/adsp/adsp.cpp1
-rw-r--r--src/audio_core/renderer/adsp/audio_renderer.cpp5
-rw-r--r--src/audio_core/renderer/adsp/command_list_processor.cpp1
-rw-r--r--src/audio_core/renderer/command/data_source/decode.cpp23
-rw-r--r--src/audio_core/renderer/command/effect/compressor.cpp8
-rw-r--r--src/audio_core/renderer/command/effect/delay.cpp14
-rw-r--r--src/audio_core/renderer/command/effect/i3dl2_reverb.cpp4
-rw-r--r--src/audio_core/renderer/command/effect/light_limiter.cpp12
-rw-r--r--src/audio_core/renderer/command/effect/reverb.cpp12
-rw-r--r--src/audio_core/renderer/command/performance/performance.cpp15
-rw-r--r--src/audio_core/renderer/command/sink/circular_buffer.cpp4
-rw-r--r--src/audio_core/renderer/command/sink/device.cpp5
-rw-r--r--src/audio_core/renderer/mix/mix_context.cpp6
-rw-r--r--src/audio_core/renderer/nodes/node_states.cpp4
-rw-r--r--src/audio_core/renderer/nodes/node_states.h2
-rw-r--r--src/audio_core/renderer/system.cpp1
-rw-r--r--src/audio_core/sink/null_sink.h2
-rw-r--r--src/audio_core/sink/sink_stream.cpp16
-rw-r--r--src/audio_core/sink/sink_stream.h5
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/fs/fs.cpp8
-rw-r--r--src/common/fs/fs_types.h2
-rw-r--r--src/common/ring_buffer.h3
-rw-r--r--src/common/scratch_buffer.h9
-rw-r--r--src/common/settings.h1
-rw-r--r--src/common/steady_clock.cpp5
-rw-r--r--src/common/wall_clock.cpp77
-rw-r--r--src/common/wall_clock.h89
-rw-r--r--src/common/x64/cpu_detect.cpp3
-rw-r--r--src/common/x64/cpu_wait.cpp20
-rw-r--r--src/common/x64/native_clock.cpp166
-rw-r--r--src/common/x64/native_clock.h59
-rw-r--r--src/common/x64/rdtsc.cpp39
-rw-r--r--src/common/x64/rdtsc.h37
-rw-r--r--src/core/CMakeLists.txt1
-rw-r--r--src/core/core_timing.cpp52
-rw-r--r--src/core/core_timing.h14
-rw-r--r--src/core/core_timing_util.h58
-rw-r--r--src/core/file_sys/patch_manager.cpp9
-rw-r--r--src/core/file_sys/system_archive/time_zone_binary.cpp2
-rw-r--r--src/core/file_sys/vfs_concat.cpp14
-rw-r--r--src/core/file_sys/vfs_real.cpp101
-rw-r--r--src/core/file_sys/vfs_real.h22
-rw-r--r--src/core/hle/kernel/k_scheduler.cpp5
-rw-r--r--src/core/hle/kernel/k_synchronization_object.cpp3
-rw-r--r--src/core/hle/kernel/k_thread.cpp23
-rw-r--r--src/core/hle/kernel/k_thread.h7
-rw-r--r--src/core/hle/kernel/svc/svc_info.cpp4
-rw-r--r--src/core/hle/kernel/svc/svc_ipc.cpp7
-rw-r--r--src/core/hle/kernel/svc/svc_synchronization.cpp10
-rw-r--r--src/core/hle/kernel/svc/svc_thread.cpp2
-rw-r--r--src/core/hle/kernel/svc/svc_tick.cpp10
-rw-r--r--src/core/hle/service/audio/audin_u.cpp16
-rw-r--r--src/core/hle/service/audio/audout_u.cpp15
-rw-r--r--src/core/hle/service/audio/audren_u.cpp22
-rw-r--r--src/core/hle/service/audio/audren_u.h1
-rw-r--r--src/core/hle/service/audio/hwopus.cpp9
-rw-r--r--src/core/hle/service/hid/hidbus.cpp1
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h30
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp19
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h21
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp32
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h38
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp59
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h36
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h20
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp8
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h8
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp24
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.h3
-rw-r--r--src/core/hle/service/nvnflinger/nvnflinger.cpp3
-rw-r--r--src/core/hle/service/nvnflinger/parcel.h7
-rw-r--r--src/core/hle/service/time/clock_types.h13
-rw-r--r--src/core/hle/service/time/standard_steady_clock_core.cpp2
-rw-r--r--src/core/hle/service/time/tick_based_steady_clock_core.cpp2
-rw-r--r--src/core/hle/service/time/time.cpp4
-rw-r--r--src/core/hle/service/time/time_sharedmemory.cpp5
-rw-r--r--src/core/hle/service/time/time_zone_manager.cpp11
-rw-r--r--src/core/hle/service/time/time_zone_service.cpp10
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp2
-rw-r--r--src/shader_recompiler/runtime_info.h3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/cdma_pusher.h1
-rw-r--r--src/video_core/dma_pusher.h8
-rw-r--r--src/video_core/engines/draw_manager.cpp10
-rw-r--r--src/video_core/engines/maxwell_dma.cpp35
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/host1x/codecs/h264.cpp4
-rw-r--r--src/video_core/memory_manager.cpp17
-rw-r--r--src/video_core/memory_manager.h15
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h11
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp78
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h9
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/texture_cache/image_base.h5
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp52
-rw-r--r--src/video_core/texture_cache/image_view_base.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h70
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h26
-rw-r--r--src/video_core/texture_cache/util.cpp48
-rw-r--r--src/video_core/texture_cache/util.h31
-rw-r--r--src/video_core/textures/texture.cpp7
-rw-r--r--src/video_core/transform_feedback.cpp8
-rw-r--r--src/video_core/transform_feedback.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h34
-rw-r--r--src/yuzu/CMakeLists.txt2
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp5
-rw-r--r--src/yuzu/configuration/configure_dialog.h7
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp55
-rw-r--r--src/yuzu/configuration/configure_graphics.h3
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp10
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui10
-rw-r--r--src/yuzu/configuration/configure_per_game.cpp5
-rw-r--r--src/yuzu/configuration/configure_per_game.h3
-rw-r--r--src/yuzu/main.cpp8
-rw-r--r--src/yuzu/main.h6
-rw-r--r--src/yuzu/vk_device_info.cpp61
-rw-r--r--src/yuzu/vk_device_info.h36
160 files changed, 1750 insertions, 1263 deletions
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt
index f0a6753a9..b1771b424 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt
@@ -27,13 +27,13 @@ import android.view.MotionEvent
27import android.view.Surface 27import android.view.Surface
28import android.view.View 28import android.view.View
29import android.view.inputmethod.InputMethodManager 29import android.view.inputmethod.InputMethodManager
30import android.widget.Toast
30import androidx.activity.viewModels 31import androidx.activity.viewModels
31import androidx.appcompat.app.AppCompatActivity 32import androidx.appcompat.app.AppCompatActivity
32import androidx.core.view.WindowCompat 33import androidx.core.view.WindowCompat
33import androidx.core.view.WindowInsetsCompat 34import androidx.core.view.WindowInsetsCompat
34import androidx.core.view.WindowInsetsControllerCompat 35import androidx.core.view.WindowInsetsControllerCompat
35import androidx.navigation.fragment.NavHostFragment 36import androidx.navigation.fragment.NavHostFragment
36import kotlin.math.roundToInt
37import org.yuzu.yuzu_emu.NativeLibrary 37import org.yuzu.yuzu_emu.NativeLibrary
38import org.yuzu.yuzu_emu.R 38import org.yuzu.yuzu_emu.R
39import org.yuzu.yuzu_emu.databinding.ActivityEmulationBinding 39import org.yuzu.yuzu_emu.databinding.ActivityEmulationBinding
@@ -44,8 +44,10 @@ import org.yuzu.yuzu_emu.model.Game
44import org.yuzu.yuzu_emu.utils.ControllerMappingHelper 44import org.yuzu.yuzu_emu.utils.ControllerMappingHelper
45import org.yuzu.yuzu_emu.utils.ForegroundService 45import org.yuzu.yuzu_emu.utils.ForegroundService
46import org.yuzu.yuzu_emu.utils.InputHandler 46import org.yuzu.yuzu_emu.utils.InputHandler
47import org.yuzu.yuzu_emu.utils.MemoryUtil
47import org.yuzu.yuzu_emu.utils.NfcReader 48import org.yuzu.yuzu_emu.utils.NfcReader
48import org.yuzu.yuzu_emu.utils.ThemeHelper 49import org.yuzu.yuzu_emu.utils.ThemeHelper
50import kotlin.math.roundToInt
49 51
50class EmulationActivity : AppCompatActivity(), SensorEventListener { 52class EmulationActivity : AppCompatActivity(), SensorEventListener {
51 private lateinit var binding: ActivityEmulationBinding 53 private lateinit var binding: ActivityEmulationBinding
@@ -102,6 +104,19 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener {
102 inputHandler = InputHandler() 104 inputHandler = InputHandler()
103 inputHandler.initialize() 105 inputHandler.initialize()
104 106
107 val memoryUtil = MemoryUtil(this)
108 if (memoryUtil.isLessThan(8, MemoryUtil.Gb)) {
109 Toast.makeText(
110 this,
111 getString(
112 R.string.device_memory_inadequate,
113 memoryUtil.getDeviceRAM(),
114 "8 ${getString(R.string.memory_gigabyte)}"
115 ),
116 Toast.LENGTH_LONG
117 ).show()
118 }
119
105 // Start a foreground service to prevent the app from getting killed in the background 120 // Start a foreground service to prevent the app from getting killed in the background
106 val startIntent = Intent(this, ForegroundService::class.java) 121 val startIntent = Intent(this, ForegroundService::class.java)
107 startForegroundService(startIntent) 122 startForegroundService(startIntent)
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt
index 6f8adbba5..5a36ffad4 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt
@@ -68,79 +68,109 @@ class HomeSettingsFragment : Fragment() {
68 override fun onViewCreated(view: View, savedInstanceState: Bundle?) { 68 override fun onViewCreated(view: View, savedInstanceState: Bundle?) {
69 mainActivity = requireActivity() as MainActivity 69 mainActivity = requireActivity() as MainActivity
70 70
71 val optionsList: MutableList<HomeSetting> = mutableListOf( 71 val optionsList: MutableList<HomeSetting> = mutableListOf<HomeSetting>().apply {
72 HomeSetting( 72 add(
73 R.string.advanced_settings, 73 HomeSetting(
74 R.string.settings_description, 74 R.string.advanced_settings,
75 R.drawable.ic_settings 75 R.string.settings_description,
76 ) { SettingsActivity.launch(requireContext(), SettingsFile.FILE_NAME_CONFIG, "") }, 76 R.drawable.ic_settings
77 HomeSetting( 77 ) { SettingsActivity.launch(requireContext(), SettingsFile.FILE_NAME_CONFIG, "") }
78 R.string.open_user_folder, 78 )
79 R.string.open_user_folder_description, 79 add(
80 R.drawable.ic_folder_open 80 HomeSetting(
81 ) { openFileManager() }, 81 R.string.open_user_folder,
82 HomeSetting( 82 R.string.open_user_folder_description,
83 R.string.preferences_theme, 83 R.drawable.ic_folder_open
84 R.string.theme_and_color_description, 84 ) { openFileManager() }
85 R.drawable.ic_palette 85 )
86 ) { SettingsActivity.launch(requireContext(), Settings.SECTION_THEME, "") }, 86 add(
87 HomeSetting( 87 HomeSetting(
88 R.string.install_gpu_driver, 88 R.string.preferences_theme,
89 R.string.install_gpu_driver_description, 89 R.string.theme_and_color_description,
90 R.drawable.ic_exit 90 R.drawable.ic_palette
91 ) { driverInstaller() }, 91 ) { SettingsActivity.launch(requireContext(), Settings.SECTION_THEME, "") }
92 HomeSetting( 92 )
93 R.string.install_amiibo_keys, 93
94 R.string.install_amiibo_keys_description, 94 if (GpuDriverHelper.supportsCustomDriverLoading()) {
95 R.drawable.ic_nfc 95 add(
96 ) { mainActivity.getAmiiboKey.launch(arrayOf("*/*")) }, 96 HomeSetting(
97 HomeSetting( 97 R.string.install_gpu_driver,
98 R.string.install_game_content, 98 R.string.install_gpu_driver_description,
99 R.string.install_game_content_description, 99 R.drawable.ic_exit
100 R.drawable.ic_system_update_alt 100 ) { driverInstaller() }
101 ) { mainActivity.installGameUpdate.launch(arrayOf("*/*")) },
102 HomeSetting(
103 R.string.select_games_folder,
104 R.string.select_games_folder_description,
105 R.drawable.ic_add
106 ) {
107 mainActivity.getGamesDirectory.launch(Intent(Intent.ACTION_OPEN_DOCUMENT_TREE).data)
108 },
109 HomeSetting(
110 R.string.manage_save_data,
111 R.string.import_export_saves_description,
112 R.drawable.ic_save
113 ) {
114 ImportExportSavesFragment().show(
115 parentFragmentManager,
116 ImportExportSavesFragment.TAG
117 ) 101 )
118 },
119 HomeSetting(
120 R.string.install_prod_keys,
121 R.string.install_prod_keys_description,
122 R.drawable.ic_unlock
123 ) { mainActivity.getProdKey.launch(arrayOf("*/*")) },
124 HomeSetting(
125 R.string.install_firmware,
126 R.string.install_firmware_description,
127 R.drawable.ic_firmware
128 ) { mainActivity.getFirmware.launch(arrayOf("application/zip")) },
129 HomeSetting(
130 R.string.share_log,
131 R.string.share_log_description,
132 R.drawable.ic_log
133 ) { shareLog() },
134 HomeSetting(
135 R.string.about,
136 R.string.about_description,
137 R.drawable.ic_info_outline
138 ) {
139 exitTransition = MaterialSharedAxis(MaterialSharedAxis.X, true)
140 parentFragmentManager.primaryNavigationFragment?.findNavController()
141 ?.navigate(R.id.action_homeSettingsFragment_to_aboutFragment)
142 } 102 }
143 ) 103
104 add(
105 HomeSetting(
106 R.string.install_amiibo_keys,
107 R.string.install_amiibo_keys_description,
108 R.drawable.ic_nfc
109 ) { mainActivity.getAmiiboKey.launch(arrayOf("*/*")) }
110 )
111 add(
112 HomeSetting(
113 R.string.install_game_content,
114 R.string.install_game_content_description,
115 R.drawable.ic_system_update_alt
116 ) { mainActivity.installGameUpdate.launch(arrayOf("*/*")) }
117 )
118 add(
119 HomeSetting(
120 R.string.select_games_folder,
121 R.string.select_games_folder_description,
122 R.drawable.ic_add
123 ) {
124 mainActivity.getGamesDirectory.launch(
125 Intent(Intent.ACTION_OPEN_DOCUMENT_TREE).data
126 )
127 }
128 )
129 add(
130 HomeSetting(
131 R.string.manage_save_data,
132 R.string.import_export_saves_description,
133 R.drawable.ic_save
134 ) {
135 ImportExportSavesFragment().show(
136 parentFragmentManager,
137 ImportExportSavesFragment.TAG
138 )
139 }
140 )
141 add(
142 HomeSetting(
143 R.string.install_prod_keys,
144 R.string.install_prod_keys_description,
145 R.drawable.ic_unlock
146 ) { mainActivity.getProdKey.launch(arrayOf("*/*")) }
147 )
148 add(
149 HomeSetting(
150 R.string.install_firmware,
151 R.string.install_firmware_description,
152 R.drawable.ic_firmware
153 ) { mainActivity.getFirmware.launch(arrayOf("application/zip")) }
154 )
155 add(
156 HomeSetting(
157 R.string.share_log,
158 R.string.share_log_description,
159 R.drawable.ic_log
160 ) { shareLog() }
161 )
162 add(
163 HomeSetting(
164 R.string.about,
165 R.string.about_description,
166 R.drawable.ic_info_outline
167 ) {
168 exitTransition = MaterialSharedAxis(MaterialSharedAxis.X, true)
169 parentFragmentManager.primaryNavigationFragment?.findNavController()
170 ?.navigate(R.id.action_homeSettingsFragment_to_aboutFragment)
171 }
172 )
173 }
144 174
145 if (!BuildConfig.PREMIUM) { 175 if (!BuildConfig.PREMIUM) {
146 optionsList.add( 176 optionsList.add(
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt
new file mode 100644
index 000000000..b29b627e9
--- /dev/null
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt
@@ -0,0 +1,62 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4package org.yuzu.yuzu_emu.fragments
5
6import android.app.Dialog
7import android.content.Intent
8import android.net.Uri
9import android.os.Bundle
10import androidx.fragment.app.DialogFragment
11import com.google.android.material.dialog.MaterialAlertDialogBuilder
12import org.yuzu.yuzu_emu.R
13
14class LongMessageDialogFragment : DialogFragment() {
15 override fun onCreateDialog(savedInstanceState: Bundle?): Dialog {
16 val titleId = requireArguments().getInt(TITLE)
17 val description = requireArguments().getString(DESCRIPTION)
18 val helpLinkId = requireArguments().getInt(HELP_LINK)
19
20 val dialog = MaterialAlertDialogBuilder(requireContext())
21 .setPositiveButton(R.string.close, null)
22 .setTitle(titleId)
23 .setMessage(description)
24
25 if (helpLinkId != 0) {
26 dialog.setNeutralButton(R.string.learn_more) { _, _ ->
27 openLink(getString(helpLinkId))
28 }
29 }
30
31 return dialog.show()
32 }
33
34 private fun openLink(link: String) {
35 val intent = Intent(Intent.ACTION_VIEW, Uri.parse(link))
36 startActivity(intent)
37 }
38
39 companion object {
40 const val TAG = "LongMessageDialogFragment"
41
42 private const val TITLE = "Title"
43 private const val DESCRIPTION = "Description"
44 private const val HELP_LINK = "Link"
45
46 fun newInstance(
47 titleId: Int,
48 description: String,
49 helpLinkId: Int = 0
50 ): LongMessageDialogFragment {
51 val dialog = LongMessageDialogFragment()
52 val bundle = Bundle()
53 bundle.apply {
54 putInt(TITLE, titleId)
55 putString(DESCRIPTION, description)
56 putInt(HELP_LINK, helpLinkId)
57 }
58 dialog.arguments = bundle
59 return dialog
60 }
61 }
62}
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt
index cc1d87f1b..3086cfad3 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt
@@ -4,6 +4,7 @@
4package org.yuzu.yuzu_emu.ui.main 4package org.yuzu.yuzu_emu.ui.main
5 5
6import android.content.Intent 6import android.content.Intent
7import android.net.Uri
7import android.os.Bundle 8import android.os.Bundle
8import android.view.View 9import android.view.View
9import android.view.ViewGroup.MarginLayoutParams 10import android.view.ViewGroup.MarginLayoutParams
@@ -42,6 +43,7 @@ import org.yuzu.yuzu_emu.features.settings.model.SettingsViewModel
42import org.yuzu.yuzu_emu.features.settings.ui.SettingsActivity 43import org.yuzu.yuzu_emu.features.settings.ui.SettingsActivity
43import org.yuzu.yuzu_emu.features.settings.utils.SettingsFile 44import org.yuzu.yuzu_emu.features.settings.utils.SettingsFile
44import org.yuzu.yuzu_emu.fragments.IndeterminateProgressDialogFragment 45import org.yuzu.yuzu_emu.fragments.IndeterminateProgressDialogFragment
46import org.yuzu.yuzu_emu.fragments.LongMessageDialogFragment
45import org.yuzu.yuzu_emu.fragments.MessageDialogFragment 47import org.yuzu.yuzu_emu.fragments.MessageDialogFragment
46import org.yuzu.yuzu_emu.model.GamesViewModel 48import org.yuzu.yuzu_emu.model.GamesViewModel
47import org.yuzu.yuzu_emu.model.HomeViewModel 49import org.yuzu.yuzu_emu.model.HomeViewModel
@@ -481,62 +483,110 @@ class MainActivity : AppCompatActivity(), ThemeProvider {
481 } 483 }
482 } 484 }
483 485
484 val installGameUpdate = 486 val installGameUpdate = registerForActivityResult(
485 registerForActivityResult(ActivityResultContracts.OpenDocument()) { 487 ActivityResultContracts.OpenMultipleDocuments()
486 if (it == null) { 488 ) { documents: List<Uri> ->
487 return@registerForActivityResult 489 if (documents.isNotEmpty()) {
488 }
489
490 IndeterminateProgressDialogFragment.newInstance( 490 IndeterminateProgressDialogFragment.newInstance(
491 this@MainActivity, 491 this@MainActivity,
492 R.string.install_game_content 492 R.string.install_game_content
493 ) { 493 ) {
494 val result = NativeLibrary.installFileToNand(it.toString()) 494 var installSuccess = 0
495 var installOverwrite = 0
496 var errorBaseGame = 0
497 var errorExtension = 0
498 var errorOther = 0
499 var errorTotal = 0
495 lifecycleScope.launch { 500 lifecycleScope.launch {
496 withContext(Dispatchers.Main) { 501 documents.forEach {
497 when (result) { 502 when (NativeLibrary.installFileToNand(it.toString())) {
498 NativeLibrary.InstallFileToNandResult.Success -> { 503 NativeLibrary.InstallFileToNandResult.Success -> {
499 Toast.makeText( 504 installSuccess += 1
500 applicationContext,
501 R.string.install_game_content_success,
502 Toast.LENGTH_SHORT
503 ).show()
504 } 505 }
505 506
506 NativeLibrary.InstallFileToNandResult.SuccessFileOverwritten -> { 507 NativeLibrary.InstallFileToNandResult.SuccessFileOverwritten -> {
507 Toast.makeText( 508 installOverwrite += 1
508 applicationContext,
509 R.string.install_game_content_success_overwrite,
510 Toast.LENGTH_SHORT
511 ).show()
512 } 509 }
513 510
514 NativeLibrary.InstallFileToNandResult.ErrorBaseGame -> { 511 NativeLibrary.InstallFileToNandResult.ErrorBaseGame -> {
515 MessageDialogFragment.newInstance( 512 errorBaseGame += 1
516 R.string.install_game_content_failure,
517 R.string.install_game_content_failure_base
518 ).show(supportFragmentManager, MessageDialogFragment.TAG)
519 } 513 }
520 514
521 NativeLibrary.InstallFileToNandResult.ErrorFilenameExtension -> { 515 NativeLibrary.InstallFileToNandResult.ErrorFilenameExtension -> {
522 MessageDialogFragment.newInstance( 516 errorExtension += 1
523 R.string.install_game_content_failure,
524 R.string.install_game_content_failure_file_extension,
525 R.string.install_game_content_help_link
526 ).show(supportFragmentManager, MessageDialogFragment.TAG)
527 } 517 }
528 518
529 else -> { 519 else -> {
530 MessageDialogFragment.newInstance( 520 errorOther += 1
531 R.string.install_game_content_failure,
532 R.string.install_game_content_failure_description,
533 R.string.install_game_content_help_link
534 ).show(supportFragmentManager, MessageDialogFragment.TAG)
535 } 521 }
536 } 522 }
537 } 523 }
524 withContext(Dispatchers.Main) {
525 val separator = System.getProperty("line.separator") ?: "\n"
526 val installResult = StringBuilder()
527 if (installSuccess > 0) {
528 installResult.append(
529 getString(
530 R.string.install_game_content_success_install,
531 installSuccess
532 )
533 )
534 installResult.append(separator)
535 }
536 if (installOverwrite > 0) {
537 installResult.append(
538 getString(
539 R.string.install_game_content_success_overwrite,
540 installOverwrite
541 )
542 )
543 installResult.append(separator)
544 }
545 errorTotal = errorBaseGame + errorExtension + errorOther
546 if (errorTotal > 0) {
547 installResult.append(separator)
548 installResult.append(
549 getString(
550 R.string.install_game_content_failed_count,
551 errorTotal
552 )
553 )
554 installResult.append(separator)
555 if (errorBaseGame > 0) {
556 installResult.append(separator)
557 installResult.append(
558 getString(R.string.install_game_content_failure_base)
559 )
560 installResult.append(separator)
561 }
562 if (errorExtension > 0) {
563 installResult.append(separator)
564 installResult.append(
565 getString(R.string.install_game_content_failure_file_extension)
566 )
567 installResult.append(separator)
568 }
569 if (errorOther > 0) {
570 installResult.append(
571 getString(R.string.install_game_content_failure_description)
572 )
573 installResult.append(separator)
574 }
575 LongMessageDialogFragment.newInstance(
576 R.string.install_game_content_failure,
577 installResult.toString().trim(),
578 R.string.install_game_content_help_link
579 ).show(supportFragmentManager, LongMessageDialogFragment.TAG)
580 } else {
581 LongMessageDialogFragment.newInstance(
582 R.string.install_game_content_success,
583 installResult.toString().trim()
584 ).show(supportFragmentManager, LongMessageDialogFragment.TAG)
585 }
586 }
538 } 587 }
539 return@newInstance result 588 return@newInstance installSuccess + installOverwrite + errorTotal
540 }.show(supportFragmentManager, IndeterminateProgressDialogFragment.TAG) 589 }.show(supportFragmentManager, IndeterminateProgressDialogFragment.TAG)
541 } 590 }
591 }
542} 592}
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt
index dad159481..1d4695a2a 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt
@@ -113,6 +113,8 @@ object GpuDriverHelper {
113 initializeDriverParameters(context) 113 initializeDriverParameters(context)
114 } 114 }
115 115
116 external fun supportsCustomDriverLoading(): Boolean
117
116 // Parse the custom driver metadata to retrieve the name. 118 // Parse the custom driver metadata to retrieve the name.
117 val customDriverName: String? 119 val customDriverName: String?
118 get() { 120 get() {
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt
new file mode 100644
index 000000000..18e5fa0b0
--- /dev/null
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt
@@ -0,0 +1,59 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4package org.yuzu.yuzu_emu.utils
5
6import android.app.ActivityManager
7import android.content.Context
8import org.yuzu.yuzu_emu.R
9import java.util.Locale
10
11class MemoryUtil(val context: Context) {
12
13 private val Long.floatForm: String
14 get() = String.format(Locale.ROOT, "%.2f", this.toDouble())
15
16 private fun bytesToSizeUnit(size: Long): String {
17 return when {
18 size < Kb -> "${size.floatForm} ${context.getString(R.string.memory_byte)}"
19 size < Mb -> "${(size / Kb).floatForm} ${context.getString(R.string.memory_kilobyte)}"
20 size < Gb -> "${(size / Mb).floatForm} ${context.getString(R.string.memory_megabyte)}"
21 size < Tb -> "${(size / Gb).floatForm} ${context.getString(R.string.memory_gigabyte)}"
22 size < Pb -> "${(size / Tb).floatForm} ${context.getString(R.string.memory_terabyte)}"
23 size < Eb -> "${(size / Pb).floatForm} ${context.getString(R.string.memory_petabyte)}"
24 else -> "${(size / Eb).floatForm} ${context.getString(R.string.memory_exabyte)}"
25 }
26 }
27
28 private val totalMemory =
29 with(context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager) {
30 val memInfo = ActivityManager.MemoryInfo()
31 getMemoryInfo(memInfo)
32 memInfo.totalMem
33 }
34
35 fun isLessThan(minimum: Int, size: Long): Boolean {
36 return when (size) {
37 Kb -> totalMemory < Mb && totalMemory < minimum
38 Mb -> totalMemory < Gb && (totalMemory / Mb) < minimum
39 Gb -> totalMemory < Tb && (totalMemory / Gb) < minimum
40 Tb -> totalMemory < Pb && (totalMemory / Tb) < minimum
41 Pb -> totalMemory < Eb && (totalMemory / Pb) < minimum
42 Eb -> totalMemory / Eb < minimum
43 else -> totalMemory < Kb && totalMemory < minimum
44 }
45 }
46
47 fun getDeviceRAM(): String {
48 return bytesToSizeUnit(totalMemory)
49 }
50
51 companion object {
52 const val Kb: Long = 1024
53 const val Mb = Kb * 1024
54 const val Gb = Mb * 1024
55 const val Tb = Gb * 1024
56 const val Pb = Tb * 1024
57 const val Eb = Pb * 1024
58 }
59}
diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp
index f9617202b..f4fed0886 100644
--- a/src/android/app/src/main/jni/native.cpp
+++ b/src/android/app/src/main/jni/native.cpp
@@ -237,6 +237,7 @@ public:
237 m_software_keyboard = android_keyboard.get(); 237 m_software_keyboard = android_keyboard.get();
238 m_system.SetShuttingDown(false); 238 m_system.SetShuttingDown(false);
239 m_system.ApplySettings(); 239 m_system.ApplySettings();
240 Settings::LogSettings();
240 m_system.HIDCore().ReloadInputDevices(); 241 m_system.HIDCore().ReloadInputDevices();
241 m_system.SetAppletFrontendSet({ 242 m_system.SetAppletFrontendSet({
242 nullptr, // Amiibo Settings 243 nullptr, // Amiibo Settings
@@ -560,6 +561,26 @@ void JNICALL Java_org_yuzu_yuzu_1emu_NativeLibrary_initializeGpuDriver(
560 GetJString(env, custom_driver_name), GetJString(env, file_redirect_dir)); 561 GetJString(env, custom_driver_name), GetJString(env, file_redirect_dir));
561} 562}
562 563
564[[maybe_unused]] static bool CheckKgslPresent() {
565 constexpr auto KgslPath{"/dev/kgsl-3d0"};
566
567 return access(KgslPath, F_OK) == 0;
568}
569
570[[maybe_unused]] bool SupportsCustomDriver() {
571 return android_get_device_api_level() >= 28 && CheckKgslPresent();
572}
573
574jboolean JNICALL Java_org_yuzu_yuzu_1emu_utils_GpuDriverHelper_supportsCustomDriverLoading(
575 [[maybe_unused]] JNIEnv* env, [[maybe_unused]] jobject instance) {
576#ifdef ARCHITECTURE_arm64
577 // If the KGSL device exists custom drivers can be loaded using adrenotools
578 return SupportsCustomDriver();
579#else
580 return false;
581#endif
582}
583
563jboolean Java_org_yuzu_yuzu_1emu_NativeLibrary_reloadKeys(JNIEnv* env, 584jboolean Java_org_yuzu_yuzu_1emu_NativeLibrary_reloadKeys(JNIEnv* env,
564 [[maybe_unused]] jclass clazz) { 585 [[maybe_unused]] jclass clazz) {
565 Core::Crypto::KeyManager::Instance().ReloadKeys(); 586 Core::Crypto::KeyManager::Instance().ReloadKeys();
diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml
index cc1d8c39d..21805d274 100644
--- a/src/android/app/src/main/res/values/strings.xml
+++ b/src/android/app/src/main/res/values/strings.xml
@@ -104,12 +104,14 @@
104 <string name="share_log_missing">No log file found</string> 104 <string name="share_log_missing">No log file found</string>
105 <string name="install_game_content">Install game content</string> 105 <string name="install_game_content">Install game content</string>
106 <string name="install_game_content_description">Install game updates or DLC</string> 106 <string name="install_game_content_description">Install game updates or DLC</string>
107 <string name="install_game_content_failure">Error installing file to NAND</string> 107 <string name="install_game_content_failure">Error installing file(s) to NAND</string>
108 <string name="install_game_content_failure_description">Game content installation failed. Please ensure content is valid and that the prod.keys file is installed.</string> 108 <string name="install_game_content_failure_description">Please ensure content(s) are valid and that the prod.keys file is installed.</string>
109 <string name="install_game_content_failure_base">Installation of base games isn\'t permitted in order to avoid possible conflicts. Please select an update or DLC instead.</string> 109 <string name="install_game_content_failure_base">Installation of base games isn\'t permitted in order to avoid possible conflicts.</string>
110 <string name="install_game_content_failure_file_extension">The selected file type is not supported. Only NSP and XCI content is supported for this action. Please verify the game content is valid.</string> 110 <string name="install_game_content_failure_file_extension">Only NSP and XCI content is supported. Please verify the game content(s) are valid.</string>
111 <string name="install_game_content_success">Game content installed successfully</string> 111 <string name="install_game_content_failed_count">%1$d installation error(s)</string>
112 <string name="install_game_content_success_overwrite">Game content was overwritten successfully</string> 112 <string name="install_game_content_success">Game content(s) installed successfully</string>
113 <string name="install_game_content_success_install">%1$d installed successfully</string>
114 <string name="install_game_content_success_overwrite">%1$d overwritten successfully</string>
113 <string name="install_game_content_help_link">https://yuzu-emu.org/help/quickstart/#dumping-installed-updates</string> 115 <string name="install_game_content_help_link">https://yuzu-emu.org/help/quickstart/#dumping-installed-updates</string>
114 116
115 <!-- About screen strings --> 117 <!-- About screen strings -->
@@ -270,6 +272,7 @@
270 <string name="fatal_error">Fatal Error</string> 272 <string name="fatal_error">Fatal Error</string>
271 <string name="fatal_error_message">A fatal error occurred. Check the log for details.\nContinuing emulation may result in crashes and bugs.</string> 273 <string name="fatal_error_message">A fatal error occurred. Check the log for details.\nContinuing emulation may result in crashes and bugs.</string>
272 <string name="performance_warning">Turning off this setting will significantly reduce emulation performance! For the best experience, it is recommended that you leave this setting enabled.</string> 274 <string name="performance_warning">Turning off this setting will significantly reduce emulation performance! For the best experience, it is recommended that you leave this setting enabled.</string>
275 <string name="device_memory_inadequate">Device RAM: %1$s\nRecommended: %2$s</string>
273 276
274 <!-- Region Names --> 277 <!-- Region Names -->
275 <string name="region_japan">Japan</string> 278 <string name="region_japan">Japan</string>
@@ -300,6 +303,15 @@
300 <string name="language_traditional_chinese">Traditional Chinese (正體中文)</string> 303 <string name="language_traditional_chinese">Traditional Chinese (正體中文)</string>
301 <string name="language_brazilian_portuguese">Brazilian Portuguese (Português do Brasil)</string> 304 <string name="language_brazilian_portuguese">Brazilian Portuguese (Português do Brasil)</string>
302 305
306 <!-- Memory Sizes -->
307 <string name="memory_byte">Byte</string>
308 <string name="memory_kilobyte">KB</string>
309 <string name="memory_megabyte">MB</string>
310 <string name="memory_gigabyte">GB</string>
311 <string name="memory_terabyte">TB</string>
312 <string name="memory_petabyte">PB</string>
313 <string name="memory_exabyte">EB</string>
314
303 <!-- Renderer APIs --> 315 <!-- Renderer APIs -->
304 <string name="renderer_vulkan">Vulkan</string> 316 <string name="renderer_vulkan">Vulkan</string>
305 <string name="renderer_none">None</string> 317 <string name="renderer_none">None</string>
diff --git a/src/audio_core/device/audio_buffers.h b/src/audio_core/device/audio_buffers.h
index 15082f6c6..5d8ed0ef7 100644
--- a/src/audio_core/device/audio_buffers.h
+++ b/src/audio_core/device/audio_buffers.h
@@ -7,6 +7,7 @@
7#include <mutex> 7#include <mutex>
8#include <span> 8#include <span>
9#include <vector> 9#include <vector>
10#include <boost/container/static_vector.hpp>
10 11
11#include "audio_buffer.h" 12#include "audio_buffer.h"
12#include "audio_core/device/device_session.h" 13#include "audio_core/device/device_session.h"
@@ -48,7 +49,7 @@ public:
48 * 49 *
49 * @param out_buffers - The buffers which were registered. 50 * @param out_buffers - The buffers which were registered.
50 */ 51 */
51 void RegisterBuffers(std::vector<AudioBuffer>& out_buffers) { 52 void RegisterBuffers(boost::container::static_vector<AudioBuffer, N>& out_buffers) {
52 std::scoped_lock l{lock}; 53 std::scoped_lock l{lock};
53 const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit), 54 const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit),
54 BufferAppendLimit - registered_count)}; 55 BufferAppendLimit - registered_count)};
@@ -162,7 +163,8 @@ public:
162 * @param max_buffers - Maximum number of buffers to released. 163 * @param max_buffers - Maximum number of buffers to released.
163 * @return The number of buffers released. 164 * @return The number of buffers released.
164 */ 165 */
165 u32 GetRegisteredAppendedBuffers(std::vector<AudioBuffer>& buffers_flushed, u32 max_buffers) { 166 u32 GetRegisteredAppendedBuffers(
167 boost::container::static_vector<AudioBuffer, N>& buffers_flushed, u32 max_buffers) {
166 std::scoped_lock l{lock}; 168 std::scoped_lock l{lock};
167 if (registered_count + appended_count == 0) { 169 if (registered_count + appended_count == 0) {
168 return 0; 170 return 0;
@@ -270,7 +272,7 @@ public:
270 */ 272 */
271 bool FlushBuffers(u32& buffers_released) { 273 bool FlushBuffers(u32& buffers_released) {
272 std::scoped_lock l{lock}; 274 std::scoped_lock l{lock};
273 std::vector<AudioBuffer> buffers_flushed{}; 275 boost::container::static_vector<AudioBuffer, N> buffers_flushed{};
274 276
275 buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit); 277 buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit);
276 278
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp
index b5c0ef0e6..86811fcb8 100644
--- a/src/audio_core/device/device_session.cpp
+++ b/src/audio_core/device/device_session.cpp
@@ -79,7 +79,7 @@ void DeviceSession::ClearBuffers() {
79 } 79 }
80} 80}
81 81
82void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const { 82void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) {
83 for (const auto& buffer : buffers) { 83 for (const auto& buffer : buffers) {
84 Sink::SinkBuffer new_buffer{ 84 Sink::SinkBuffer new_buffer{
85 .frames = buffer.size / (channel_count * sizeof(s16)), 85 .frames = buffer.size / (channel_count * sizeof(s16)),
@@ -88,13 +88,13 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const {
88 .consumed = false, 88 .consumed = false,
89 }; 89 };
90 90
91 tmp_samples.resize_destructive(buffer.size / sizeof(s16));
91 if (type == Sink::StreamType::In) { 92 if (type == Sink::StreamType::In) {
92 std::vector<s16> samples{}; 93 stream->AppendBuffer(new_buffer, tmp_samples);
93 stream->AppendBuffer(new_buffer, samples);
94 } else { 94 } else {
95 std::vector<s16> samples(buffer.size / sizeof(s16)); 95 system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(),
96 system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, samples.data(), buffer.size); 96 buffer.size);
97 stream->AppendBuffer(new_buffer, samples); 97 stream->AppendBuffer(new_buffer, tmp_samples);
98 } 98 }
99 } 99 }
100} 100}
diff --git a/src/audio_core/device/device_session.h b/src/audio_core/device/device_session.h
index 75f766c68..7d52f362d 100644
--- a/src/audio_core/device/device_session.h
+++ b/src/audio_core/device/device_session.h
@@ -10,6 +10,7 @@
10 10
11#include "audio_core/common/common.h" 11#include "audio_core/common/common.h"
12#include "audio_core/sink/sink.h" 12#include "audio_core/sink/sink.h"
13#include "common/scratch_buffer.h"
13#include "core/hle/service/audio/errors.h" 14#include "core/hle/service/audio/errors.h"
14 15
15namespace Core { 16namespace Core {
@@ -62,7 +63,7 @@ public:
62 * 63 *
63 * @param buffers - The buffers to play. 64 * @param buffers - The buffers to play.
64 */ 65 */
65 void AppendBuffers(std::span<const AudioBuffer> buffers) const; 66 void AppendBuffers(std::span<const AudioBuffer> buffers);
66 67
67 /** 68 /**
68 * (Audio In only) Pop samples from the backend, and write them back to this buffer's address. 69 * (Audio In only) Pop samples from the backend, and write them back to this buffer's address.
@@ -146,8 +147,8 @@ private:
146 std::shared_ptr<Core::Timing::EventType> thread_event; 147 std::shared_ptr<Core::Timing::EventType> thread_event;
147 /// Is this session initialised? 148 /// Is this session initialised?
148 bool initialized{}; 149 bool initialized{};
149 /// Buffer queue 150 /// Temporary sample buffer
150 std::vector<AudioBuffer> buffer_queue{}; 151 Common::ScratchBuffer<s16> tmp_samples{};
151}; 152};
152 153
153} // namespace AudioCore 154} // namespace AudioCore
diff --git a/src/audio_core/in/audio_in_system.cpp b/src/audio_core/in/audio_in_system.cpp
index e23e51758..579129121 100644
--- a/src/audio_core/in/audio_in_system.cpp
+++ b/src/audio_core/in/audio_in_system.cpp
@@ -2,6 +2,7 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <mutex> 4#include <mutex>
5
5#include "audio_core/audio_event.h" 6#include "audio_core/audio_event.h"
6#include "audio_core/audio_manager.h" 7#include "audio_core/audio_manager.h"
7#include "audio_core/in/audio_in_system.h" 8#include "audio_core/in/audio_in_system.h"
@@ -89,7 +90,7 @@ Result System::Start() {
89 session->Start(); 90 session->Start();
90 state = State::Started; 91 state = State::Started;
91 92
92 std::vector<AudioBuffer> buffers_to_flush{}; 93 boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
93 buffers.RegisterBuffers(buffers_to_flush); 94 buffers.RegisterBuffers(buffers_to_flush);
94 session->AppendBuffers(buffers_to_flush); 95 session->AppendBuffers(buffers_to_flush);
95 session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); 96 session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@@ -134,7 +135,7 @@ bool System::AppendBuffer(const AudioInBuffer& buffer, const u64 tag) {
134 135
135void System::RegisterBuffers() { 136void System::RegisterBuffers() {
136 if (state == State::Started) { 137 if (state == State::Started) {
137 std::vector<AudioBuffer> registered_buffers{}; 138 boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
138 buffers.RegisterBuffers(registered_buffers); 139 buffers.RegisterBuffers(registered_buffers);
139 session->AppendBuffers(registered_buffers); 140 session->AppendBuffers(registered_buffers);
140 } 141 }
diff --git a/src/audio_core/out/audio_out_system.cpp b/src/audio_core/out/audio_out_system.cpp
index bd13f7219..0adf64bd3 100644
--- a/src/audio_core/out/audio_out_system.cpp
+++ b/src/audio_core/out/audio_out_system.cpp
@@ -89,7 +89,7 @@ Result System::Start() {
89 session->Start(); 89 session->Start();
90 state = State::Started; 90 state = State::Started;
91 91
92 std::vector<AudioBuffer> buffers_to_flush{}; 92 boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
93 buffers.RegisterBuffers(buffers_to_flush); 93 buffers.RegisterBuffers(buffers_to_flush);
94 session->AppendBuffers(buffers_to_flush); 94 session->AppendBuffers(buffers_to_flush);
95 session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); 95 session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@@ -134,7 +134,7 @@ bool System::AppendBuffer(const AudioOutBuffer& buffer, u64 tag) {
134 134
135void System::RegisterBuffers() { 135void System::RegisterBuffers() {
136 if (state == State::Started) { 136 if (state == State::Started) {
137 std::vector<AudioBuffer> registered_buffers{}; 137 boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
138 buffers.RegisterBuffers(registered_buffers); 138 buffers.RegisterBuffers(registered_buffers);
139 session->AppendBuffers(registered_buffers); 139 session->AppendBuffers(registered_buffers);
140 } 140 }
diff --git a/src/audio_core/renderer/adsp/adsp.cpp b/src/audio_core/renderer/adsp/adsp.cpp
index 74772fc50..b1db31e93 100644
--- a/src/audio_core/renderer/adsp/adsp.cpp
+++ b/src/audio_core/renderer/adsp/adsp.cpp
@@ -7,7 +7,6 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "core/core_timing_util.h"
11#include "core/memory.h" 10#include "core/memory.h"
12 11
13namespace AudioCore::AudioRenderer::ADSP { 12namespace AudioCore::AudioRenderer::ADSP {
diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp
index 8bc39f9f9..9ca716b60 100644
--- a/src/audio_core/renderer/adsp/audio_renderer.cpp
+++ b/src/audio_core/renderer/adsp/audio_renderer.cpp
@@ -13,7 +13,6 @@
13#include "common/thread.h" 13#include "common/thread.h"
14#include "core/core.h" 14#include "core/core.h"
15#include "core/core_timing.h" 15#include "core/core_timing.h"
16#include "core/core_timing_util.h"
17 16
18MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97)); 17MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97));
19 18
@@ -144,6 +143,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) {
144 143
145 mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK); 144 mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK);
146 145
146 // 0.12 seconds (2304000 / 19200000)
147 constexpr u64 max_process_time{2'304'000ULL}; 147 constexpr u64 max_process_time{2'304'000ULL};
148 148
149 while (!stop_token.stop_requested()) { 149 while (!stop_token.stop_requested()) {
@@ -184,8 +184,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) {
184 u64 max_time{max_process_time}; 184 u64 max_time{max_process_time};
185 if (index == 1 && command_buffer.applet_resource_user_id == 185 if (index == 1 && command_buffer.applet_resource_user_id ==
186 mailbox->GetCommandBuffer(0).applet_resource_user_id) { 186 mailbox->GetCommandBuffer(0).applet_resource_user_id) {
187 max_time = max_process_time - 187 max_time = max_process_time - render_times_taken[0];
188 Core::Timing::CyclesToNs(render_times_taken[0]).count();
189 if (render_times_taken[0] > max_process_time) { 188 if (render_times_taken[0] > max_process_time) {
190 max_time = 0; 189 max_time = 0;
191 } 190 }
diff --git a/src/audio_core/renderer/adsp/command_list_processor.cpp b/src/audio_core/renderer/adsp/command_list_processor.cpp
index 7a300d216..3a0f1ae38 100644
--- a/src/audio_core/renderer/adsp/command_list_processor.cpp
+++ b/src/audio_core/renderer/adsp/command_list_processor.cpp
@@ -9,7 +9,6 @@
9#include "common/settings.h" 9#include "common/settings.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_timing.h" 11#include "core/core_timing.h"
12#include "core/core_timing_util.h"
13#include "core/memory.h" 12#include "core/memory.h"
14 13
15namespace AudioCore::AudioRenderer::ADSP { 14namespace AudioCore::AudioRenderer::ADSP {
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp
index ff5d31bd6..f45933203 100644
--- a/src/audio_core/renderer/command/data_source/decode.cpp
+++ b/src/audio_core/renderer/command/data_source/decode.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/renderer/command/resample/resample.h" 8#include "audio_core/renderer/command/resample/resample.h"
9#include "common/fixed_point.h" 9#include "common/fixed_point.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/scratch_buffer.h"
11#include "core/memory.h" 12#include "core/memory.h"
12 13
13namespace AudioCore::AudioRenderer { 14namespace AudioCore::AudioRenderer {
@@ -27,6 +28,7 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4};
27template <typename T> 28template <typename T>
28static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, 29static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
29 const DecodeArg& req) { 30 const DecodeArg& req) {
31 std::array<T, TempBufferSize> tmp_samples{};
30 constexpr s32 min{std::numeric_limits<s16>::min()}; 32 constexpr s32 min{std::numeric_limits<s16>::min()};
31 constexpr s32 max{std::numeric_limits<s16>::max()}; 33 constexpr s32 max{std::numeric_limits<s16>::max()};
32 34
@@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
49 const u64 size{channel_count * samples_to_decode}; 51 const u64 size{channel_count * samples_to_decode};
50 const u64 size_bytes{size * sizeof(T)}; 52 const u64 size_bytes{size * sizeof(T)};
51 53
52 std::vector<T> samples(size); 54 memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes);
53 memory.ReadBlockUnsafe(source, samples.data(), size_bytes);
54 55
55 if constexpr (std::is_floating_point_v<T>) { 56 if constexpr (std::is_floating_point_v<T>) {
56 for (u32 i = 0; i < samples_to_decode; i++) { 57 for (u32 i = 0; i < samples_to_decode; i++) {
57 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * 58 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
58 std::numeric_limits<s16>::max())}; 59 std::numeric_limits<s16>::max())};
59 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); 60 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
60 } 61 }
61 } else { 62 } else {
62 for (u32 i = 0; i < samples_to_decode; i++) { 63 for (u32 i = 0; i < samples_to_decode; i++) {
63 out_buffer[i] = samples[i * channel_count + req.target_channel]; 64 out_buffer[i] = tmp_samples[i * channel_count + req.target_channel];
64 } 65 }
65 } 66 }
66 } break; 67 } break;
@@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
73 } 74 }
74 75
75 const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; 76 const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))};
76 std::vector<T> samples(samples_to_decode); 77 memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T));
77 memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T));
78 78
79 if constexpr (std::is_floating_point_v<T>) { 79 if constexpr (std::is_floating_point_v<T>) {
80 for (u32 i = 0; i < samples_to_decode; i++) { 80 for (u32 i = 0; i < samples_to_decode; i++) {
81 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * 81 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
82 std::numeric_limits<s16>::max())}; 82 std::numeric_limits<s16>::max())};
83 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); 83 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
84 } 84 }
85 } else { 85 } else {
86 std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); 86 std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16));
87 } 87 }
88 break; 88 break;
89 } 89 }
@@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
101 */ 101 */
102static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, 102static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
103 const DecodeArg& req) { 103 const DecodeArg& req) {
104 std::array<u8, TempBufferSize> wavebuffer{};
104 constexpr u32 SamplesPerFrame{14}; 105 constexpr u32 SamplesPerFrame{14};
105 constexpr u32 NibblesPerFrame{16}; 106 constexpr u32 NibblesPerFrame{16};
106 107
@@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
138 } 139 }
139 140
140 const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; 141 const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)};
141 std::vector<u8> wavebuffer(size); 142 memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size);
142 memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(),
143 wavebuffer.size());
144 143
145 auto context{req.adpcm_context}; 144 auto context{req.adpcm_context};
146 auto header{context->header}; 145 auto header{context->header};
@@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf
258 u32 offset{voice_state.offset}; 257 u32 offset{voice_state.offset};
259 258
260 auto output_buffer{args.output}; 259 auto output_buffer{args.output};
261 std::vector<s16> temp_buffer(TempBufferSize, 0); 260 std::array<s16, TempBufferSize> temp_buffer{};
262 261
263 while (remaining_sample_count > 0) { 262 while (remaining_sample_count > 0) {
264 const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; 263 const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)};
diff --git a/src/audio_core/renderer/command/effect/compressor.cpp b/src/audio_core/renderer/command/effect/compressor.cpp
index 7229618e8..ee9b68d5b 100644
--- a/src/audio_core/renderer/command/effect/compressor.cpp
+++ b/src/audio_core/renderer/command/effect/compressor.cpp
@@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2&
44 44
45static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params, 45static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params,
46 CompressorInfo::State& state, bool enabled, 46 CompressorInfo::State& state, bool enabled,
47 std::vector<std::span<const s32>> input_buffers, 47 std::span<std::span<const s32>> input_buffers,
48 std::vector<std::span<s32>> output_buffers, u32 sample_count) { 48 std::span<std::span<s32>> output_buffers, u32 sample_count) {
49 if (enabled) { 49 if (enabled) {
50 auto state_00{state.unk_00}; 50 auto state_00{state.unk_00};
51 auto state_04{state.unk_04}; 51 auto state_04{state.unk_04};
@@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
124} 124}
125 125
126void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) { 126void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) {
127 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 127 std::array<std::span<const s32>, MaxChannels> input_buffers{};
128 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 128 std::array<std::span<s32>, MaxChannels> output_buffers{};
129 129
130 for (s16 i = 0; i < parameter.channel_count; i++) { 130 for (s16 i = 0; i < parameter.channel_count; i++) {
131 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 131 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/delay.cpp b/src/audio_core/renderer/command/effect/delay.cpp
index a4e408d40..e536cbb1e 100644
--- a/src/audio_core/renderer/command/effect/delay.cpp
+++ b/src/audio_core/renderer/command/effect/delay.cpp
@@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
51 state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor(); 51 state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor();
52 state.delay_lines[channel].sample_count = sample_count.to_int_floor(); 52 state.delay_lines[channel].sample_count = sample_count.to_int_floor();
53 state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0); 53 state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0);
54 if (state.delay_lines[channel].buffer.size() == 0) { 54 if (state.delay_lines[channel].sample_count == 0) {
55 state.delay_lines[channel].buffer.push_back(0); 55 state.delay_lines[channel].buffer.push_back(0);
56 } 56 }
57 state.delay_lines[channel].buffer_pos = 0; 57 state.delay_lines[channel].buffer_pos = 0;
@@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
74 */ 74 */
75template <size_t NumChannels> 75template <size_t NumChannels>
76static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, 76static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
77 std::vector<std::span<const s32>>& inputs, 77 std::span<std::span<const s32>> inputs, std::span<std::span<s32>> outputs,
78 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 78 const u32 sample_count) {
79 for (u32 sample_index = 0; sample_index < sample_count; sample_index++) { 79 for (u32 sample_index = 0; sample_index < sample_count; sample_index++) {
80 std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{}; 80 std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{};
81 for (u32 channel = 0; channel < NumChannels; channel++) { 81 for (u32 channel = 0; channel < NumChannels; channel++) {
@@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St
153 * @param sample_count - Number of samples to process. 153 * @param sample_count - Number of samples to process.
154 */ 154 */
155static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, 155static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
156 const bool enabled, std::vector<std::span<const s32>>& inputs, 156 const bool enabled, std::span<std::span<const s32>> inputs,
157 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 157 std::span<std::span<s32>> outputs, const u32 sample_count) {
158 158
159 if (!IsChannelCountValid(params.channel_count)) { 159 if (!IsChannelCountValid(params.channel_count)) {
160 LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count); 160 LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count);
@@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce
208} 208}
209 209
210void DelayCommand::Process(const ADSP::CommandListProcessor& processor) { 210void DelayCommand::Process(const ADSP::CommandListProcessor& processor) {
211 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 211 std::array<std::span<const s32>, MaxChannels> input_buffers{};
212 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 212 std::array<std::span<s32>, MaxChannels> output_buffers{};
213 213
214 for (s16 i = 0; i < parameter.channel_count; i++) { 214 for (s16 i = 0; i < parameter.channel_count; i++) {
215 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 215 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
index 27d8b9844..d2bfb67cc 100644
--- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
+++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
@@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
408} 408}
409 409
410void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { 410void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
411 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 411 std::array<std::span<const s32>, MaxChannels> input_buffers{};
412 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 412 std::array<std::span<s32>, MaxChannels> output_buffers{};
413 413
414 for (u32 i = 0; i < parameter.channel_count; i++) { 414 for (u32 i = 0; i < parameter.channel_count; i++) {
415 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 415 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp
index e8fb0e2fc..4161a9821 100644
--- a/src/audio_core/renderer/command/effect/light_limiter.cpp
+++ b/src/audio_core/renderer/command/effect/light_limiter.cpp
@@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio
47 */ 47 */
48static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params, 48static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params,
49 LightLimiterInfo::State& state, const bool enabled, 49 LightLimiterInfo::State& state, const bool enabled,
50 std::vector<std::span<const s32>>& inputs, 50 std::span<std::span<const s32>> inputs,
51 std::vector<std::span<s32>>& outputs, const u32 sample_count, 51 std::span<std::span<s32>> outputs, const u32 sample_count,
52 LightLimiterInfo::StatisticsInternal* statistics) { 52 LightLimiterInfo::StatisticsInternal* statistics) {
53 constexpr s64 min{std::numeric_limits<s32>::min()}; 53 constexpr s64 min{std::numeric_limits<s32>::min()};
54 constexpr s64 max{std::numeric_limits<s32>::max()}; 54 constexpr s64 max{std::numeric_limits<s32>::max()};
@@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP
147} 147}
148 148
149void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) { 149void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) {
150 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 150 std::array<std::span<const s32>, MaxChannels> input_buffers{};
151 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 151 std::array<std::span<s32>, MaxChannels> output_buffers{};
152 152
153 for (u32 i = 0; i < parameter.channel_count; i++) { 153 for (u32 i = 0; i < parameter.channel_count; i++) {
154 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 154 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
@@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP
190} 190}
191 191
192void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) { 192void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) {
193 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 193 std::array<std::span<const s32>, MaxChannels> input_buffers{};
194 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 194 std::array<std::span<s32>, MaxChannels> output_buffers{};
195 195
196 for (u32 i = 0; i < parameter.channel_count; i++) { 196 for (u32 i = 0; i < parameter.channel_count; i++) {
197 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 197 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp
index 8b9b65214..fc2f15a5e 100644
--- a/src/audio_core/renderer/command/effect/reverb.cpp
+++ b/src/audio_core/renderer/command/effect/reverb.cpp
@@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine&
250 */ 250 */
251template <size_t NumChannels> 251template <size_t NumChannels>
252static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, 252static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
253 std::vector<std::span<const s32>>& inputs, 253 std::span<std::span<const s32>> inputs,
254 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 254 std::span<std::span<s32>> outputs, const u32 sample_count) {
255 static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{ 255 static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{
256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257 }; 257 };
@@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever
369 * @param sample_count - Number of samples to process. 369 * @param sample_count - Number of samples to process.
370 */ 370 */
371static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, 371static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
372 const bool enabled, std::vector<std::span<const s32>>& inputs, 372 const bool enabled, std::span<std::span<const s32>> inputs,
373 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 373 std::span<std::span<s32>> outputs, const u32 sample_count) {
374 if (enabled) { 374 if (enabled) {
375 switch (params.channel_count) { 375 switch (params.channel_count) {
376 case 0: 376 case 0:
@@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc
412} 412}
413 413
414void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { 414void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
415 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 415 std::array<std::span<const s32>, MaxChannels> input_buffers{};
416 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 416 std::array<std::span<s32>, MaxChannels> output_buffers{};
417 417
418 for (u32 i = 0; i < parameter.channel_count; i++) { 418 for (u32 i = 0; i < parameter.channel_count; i++) {
419 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 419 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/performance/performance.cpp b/src/audio_core/renderer/command/performance/performance.cpp
index 985958b03..4a881547f 100644
--- a/src/audio_core/renderer/command/performance/performance.cpp
+++ b/src/audio_core/renderer/command/performance/performance.cpp
@@ -5,7 +5,6 @@
5#include "audio_core/renderer/command/performance/performance.h" 5#include "audio_core/renderer/command/performance/performance.h"
6#include "core/core.h" 6#include "core/core.h"
7#include "core/core_timing.h" 7#include "core/core_timing.h"
8#include "core/core_timing_util.h"
9 8
10namespace AudioCore::AudioRenderer { 9namespace AudioCore::AudioRenderer {
11 10
@@ -18,20 +17,18 @@ void PerformanceCommand::Process(const ADSP::CommandListProcessor& processor) {
18 auto base{entry_address.translated_address}; 17 auto base{entry_address.translated_address};
19 if (state == PerformanceState::Start) { 18 if (state == PerformanceState::Start) {
20 auto start_time_ptr{reinterpret_cast<u32*>(base + entry_address.entry_start_time_offset)}; 19 auto start_time_ptr{reinterpret_cast<u32*>(base + entry_address.entry_start_time_offset)};
21 *start_time_ptr = static_cast<u32>( 20 *start_time_ptr =
22 Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - 21 static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time -
23 processor.start_time - processor.current_processing_time) 22 processor.current_processing_time);
24 .count());
25 } else if (state == PerformanceState::Stop) { 23 } else if (state == PerformanceState::Stop) {
26 auto processed_time_ptr{ 24 auto processed_time_ptr{
27 reinterpret_cast<u32*>(base + entry_address.entry_processed_time_offset)}; 25 reinterpret_cast<u32*>(base + entry_address.entry_processed_time_offset)};
28 auto entry_count_ptr{ 26 auto entry_count_ptr{
29 reinterpret_cast<u32*>(base + entry_address.header_entry_count_offset)}; 27 reinterpret_cast<u32*>(base + entry_address.header_entry_count_offset)};
30 28
31 *processed_time_ptr = static_cast<u32>( 29 *processed_time_ptr =
32 Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - 30 static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time -
33 processor.start_time - processor.current_processing_time) 31 processor.current_processing_time);
34 .count());
35 (*entry_count_ptr)++; 32 (*entry_count_ptr)++;
36 } 33 }
37} 34}
diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp
index ded5afc94..e2ce59792 100644
--- a/src/audio_core/renderer/command/sink/circular_buffer.cpp
+++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp
@@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
24 constexpr s32 min{std::numeric_limits<s16>::min()}; 24 constexpr s32 min{std::numeric_limits<s16>::min()};
25 constexpr s32 max{std::numeric_limits<s16>::max()}; 25 constexpr s32 max{std::numeric_limits<s16>::max()};
26 26
27 std::vector<s16> output(processor.sample_count); 27 std::array<s16, TargetSampleCount * MaxChannels> output{};
28 for (u32 channel = 0; channel < input_count; channel++) { 28 for (u32 channel = 0; channel < input_count; channel++) {
29 auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count, 29 auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count,
30 processor.sample_count)}; 30 processor.sample_count)};
@@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
33 } 33 }
34 34
35 processor.memory->WriteBlockUnsafe(address + pos, output.data(), 35 processor.memory->WriteBlockUnsafe(address + pos, output.data(),
36 output.size() * sizeof(s16)); 36 processor.sample_count * sizeof(s16));
37 pos += static_cast<u32>(processor.sample_count * sizeof(s16)); 37 pos += static_cast<u32>(processor.sample_count * sizeof(s16));
38 if (pos >= size) { 38 if (pos >= size) {
39 pos = 0; 39 pos = 0;
diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp
index e88372a75..5f74dd7ad 100644
--- a/src/audio_core/renderer/command/sink/device.cpp
+++ b/src/audio_core/renderer/command/sink/device.cpp
@@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
33 .consumed{false}, 33 .consumed{false},
34 }; 34 };
35 35
36 std::vector<s16> samples(out_buffer.frames * input_count); 36 std::array<s16, TargetSampleCount * MaxChannels> samples{};
37
38 for (u32 channel = 0; channel < input_count; channel++) { 37 for (u32 channel = 0; channel < input_count; channel++) {
39 const auto offset{inputs[channel] * out_buffer.frames}; 38 const auto offset{inputs[channel] * out_buffer.frames};
40 39
@@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
45 } 44 }
46 45
47 out_buffer.tag = reinterpret_cast<u64>(samples.data()); 46 out_buffer.tag = reinterpret_cast<u64>(samples.data());
48 stream->AppendBuffer(out_buffer, samples); 47 stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count});
49 48
50 if (stream->IsPaused()) { 49 if (stream->IsPaused()) {
51 stream->Start(); 50 stream->Start();
diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp
index 35b748ede..3a18ae7c2 100644
--- a/src/audio_core/renderer/mix/mix_context.cpp
+++ b/src/audio_core/renderer/mix/mix_context.cpp
@@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) {
125 return false; 125 return false;
126 } 126 }
127 127
128 std::vector<s32> sorted_results{node_states.GetSortedResuls()}; 128 auto sorted_results{node_states.GetSortedResuls()};
129 const auto result_size{std::min(count, static_cast<s32>(sorted_results.size()))}; 129 const auto result_size{std::min(count, static_cast<s32>(sorted_results.second))};
130 for (s32 i = 0; i < result_size; i++) { 130 for (s32 i = 0; i < result_size; i++) {
131 sorted_mix_infos[i] = &mix_infos[sorted_results[i]]; 131 sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]];
132 } 132 }
133 133
134 CalcMixBufferOffset(); 134 CalcMixBufferOffset();
diff --git a/src/audio_core/renderer/nodes/node_states.cpp b/src/audio_core/renderer/nodes/node_states.cpp
index 1821a51e6..b7a44a54c 100644
--- a/src/audio_core/renderer/nodes/node_states.cpp
+++ b/src/audio_core/renderer/nodes/node_states.cpp
@@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const {
134 return node_count; 134 return node_count;
135} 135}
136 136
137std::vector<s32> NodeStates::GetSortedResuls() const { 137std::pair<std::span<u32>::reverse_iterator, size_t> NodeStates::GetSortedResuls() const {
138 return {results.rbegin(), results.rbegin() + result_pos}; 138 return {results.rbegin(), result_pos};
139} 139}
140 140
141} // namespace AudioCore::AudioRenderer 141} // namespace AudioCore::AudioRenderer
diff --git a/src/audio_core/renderer/nodes/node_states.h b/src/audio_core/renderer/nodes/node_states.h
index 94b1d1254..e768cd4b5 100644
--- a/src/audio_core/renderer/nodes/node_states.h
+++ b/src/audio_core/renderer/nodes/node_states.h
@@ -175,7 +175,7 @@ public:
175 * 175 *
176 * @return Vector of nodes in reverse order. 176 * @return Vector of nodes in reverse order.
177 */ 177 */
178 std::vector<s32> GetSortedResuls() const; 178 std::pair<std::span<u32>::reverse_iterator, size_t> GetSortedResuls() const;
179 179
180private: 180private:
181 /// Number of nodes in the graph 181 /// Number of nodes in the graph
diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp
index 53b258c4f..a23627472 100644
--- a/src/audio_core/renderer/system.cpp
+++ b/src/audio_core/renderer/system.cpp
@@ -444,6 +444,7 @@ Result System::Update(std::span<const u8> input, std::span<u8> performance, std:
444 std::scoped_lock l{lock}; 444 std::scoped_lock l{lock};
445 445
446 const auto start_time{core.CoreTiming().GetClockTicks()}; 446 const auto start_time{core.CoreTiming().GetClockTicks()};
447 std::memset(output.data(), 0, output.size());
447 448
448 InfoUpdater info_updater(input, output, process_handle, behavior); 449 InfoUpdater info_updater(input, output, process_handle, behavior);
449 450
diff --git a/src/audio_core/sink/null_sink.h b/src/audio_core/sink/null_sink.h
index 1215d3cd2..b6b43c93e 100644
--- a/src/audio_core/sink/null_sink.h
+++ b/src/audio_core/sink/null_sink.h
@@ -20,7 +20,7 @@ public:
20 explicit NullSinkStreamImpl(Core::System& system_, StreamType type_) 20 explicit NullSinkStreamImpl(Core::System& system_, StreamType type_)
21 : SinkStream{system_, type_} {} 21 : SinkStream{system_, type_} {}
22 ~NullSinkStreamImpl() override {} 22 ~NullSinkStreamImpl() override {}
23 void AppendBuffer(SinkBuffer&, std::vector<s16>&) override {} 23 void AppendBuffer(SinkBuffer&, std::span<s16>) override {}
24 std::vector<s16> ReleaseBuffer(u64) override { 24 std::vector<s16> ReleaseBuffer(u64) override {
25 return {}; 25 return {};
26 } 26 }
diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp
index f44fedfd5..404dcd0e9 100644
--- a/src/audio_core/sink/sink_stream.cpp
+++ b/src/audio_core/sink/sink_stream.cpp
@@ -15,11 +15,10 @@
15#include "common/settings.h" 15#include "common/settings.h"
16#include "core/core.h" 16#include "core/core.h"
17#include "core/core_timing.h" 17#include "core/core_timing.h"
18#include "core/core_timing_util.h"
19 18
20namespace AudioCore::Sink { 19namespace AudioCore::Sink {
21 20
22void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) { 21void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) {
23 if (type == StreamType::In) { 22 if (type == StreamType::In) {
24 queue.enqueue(buffer); 23 queue.enqueue(buffer);
25 queued_buffers++; 24 queued_buffers++;
@@ -67,15 +66,16 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
67 static_cast<s16>(std::clamp(right_sample, min, max)); 66 static_cast<s16>(std::clamp(right_sample, min, max));
68 } 67 }
69 68
70 samples.resize(samples.size() / system_channels * device_channels); 69 samples = samples.subspan(0, samples.size() / system_channels * device_channels);
71 70
72 } else if (system_channels == 2 && device_channels == 6) { 71 } else if (system_channels == 2 && device_channels == 6) {
73 // We need moar samples! Not all games will provide 6 channel audio. 72 // We need moar samples! Not all games will provide 6 channel audio.
74 // TODO: Implement some upmixing here. Currently just passthrough, with other 73 // TODO: Implement some upmixing here. Currently just passthrough, with other
75 // channels left as silence. 74 // channels left as silence.
76 std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0); 75 auto new_size = samples.size() / system_channels * device_channels;
76 tmp_samples.resize_destructive(new_size);
77 77
78 for (u32 read_index = 0, write_index = 0; read_index < samples.size(); 78 for (u32 read_index = 0, write_index = 0; read_index < new_size;
79 read_index += system_channels, write_index += device_channels) { 79 read_index += system_channels, write_index += device_channels) {
80 const auto left_sample{static_cast<s16>(std::clamp( 80 const auto left_sample{static_cast<s16>(std::clamp(
81 static_cast<s32>( 81 static_cast<s32>(
@@ -83,7 +83,7 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
83 volume), 83 volume),
84 min, max))}; 84 min, max))};
85 85
86 new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample; 86 tmp_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
87 87
88 const auto right_sample{static_cast<s16>(std::clamp( 88 const auto right_sample{static_cast<s16>(std::clamp(
89 static_cast<s32>( 89 static_cast<s32>(
@@ -91,9 +91,9 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
91 volume), 91 volume),
92 min, max))}; 92 min, max))};
93 93
94 new_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample; 94 tmp_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample;
95 } 95 }
96 samples = std::move(new_samples); 96 samples = std::span<s16>(tmp_samples);
97 97
98 } else if (volume != 1.0f) { 98 } else if (volume != 1.0f) {
99 for (u32 i = 0; i < samples.size(); i++) { 99 for (u32 i = 0; i < samples.size(); i++) {
diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h
index 41cbadc9c..98d72ace1 100644
--- a/src/audio_core/sink/sink_stream.h
+++ b/src/audio_core/sink/sink_stream.h
@@ -16,6 +16,7 @@
16#include "common/polyfill_thread.h" 16#include "common/polyfill_thread.h"
17#include "common/reader_writer_queue.h" 17#include "common/reader_writer_queue.h"
18#include "common/ring_buffer.h" 18#include "common/ring_buffer.h"
19#include "common/scratch_buffer.h"
19#include "common/thread.h" 20#include "common/thread.h"
20 21
21namespace Core { 22namespace Core {
@@ -170,7 +171,7 @@ public:
170 * @param buffer - Audio buffer information to be queued. 171 * @param buffer - Audio buffer information to be queued.
171 * @param samples - The s16 samples to be queue for playback. 172 * @param samples - The s16 samples to be queue for playback.
172 */ 173 */
173 virtual void AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples); 174 virtual void AppendBuffer(SinkBuffer& buffer, std::span<s16> samples);
174 175
175 /** 176 /**
176 * Release a buffer. Audio In only, will fill a buffer with recorded samples. 177 * Release a buffer. Audio In only, will fill a buffer with recorded samples.
@@ -255,6 +256,8 @@ private:
255 /// Signalled when ring buffer entries are consumed 256 /// Signalled when ring buffer entries are consumed
256 std::condition_variable_any release_cv; 257 std::condition_variable_any release_cv;
257 std::mutex release_mutex; 258 std::mutex release_mutex;
259 /// Temporary buffer for appending samples when upmixing
260 Common::ScratchBuffer<s16> tmp_samples{};
258}; 261};
259 262
260using SinkStreamPtr = std::unique_ptr<SinkStream>; 263using SinkStreamPtr = std::unique_ptr<SinkStream>;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index efc4a9fe9..3adf13a3f 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64)
172 x64/cpu_wait.h 172 x64/cpu_wait.h
173 x64/native_clock.cpp 173 x64/native_clock.cpp
174 x64/native_clock.h 174 x64/native_clock.h
175 x64/rdtsc.cpp
176 x64/rdtsc.h
175 x64/xbyak_abi.h 177 x64/xbyak_abi.h
176 x64/xbyak_util.h 178 x64/xbyak_util.h
177 ) 179 )
diff --git a/src/common/fs/fs.cpp b/src/common/fs/fs.cpp
index 6d66c926d..1baf6d746 100644
--- a/src/common/fs/fs.cpp
+++ b/src/common/fs/fs.cpp
@@ -436,7 +436,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable
436 436
437 if (True(filter & DirEntryFilter::File) && 437 if (True(filter & DirEntryFilter::File) &&
438 entry.status().type() == fs::file_type::regular) { 438 entry.status().type() == fs::file_type::regular) {
439 if (!callback(entry.path())) { 439 if (!callback(entry)) {
440 callback_error = true; 440 callback_error = true;
441 break; 441 break;
442 } 442 }
@@ -444,7 +444,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable
444 444
445 if (True(filter & DirEntryFilter::Directory) && 445 if (True(filter & DirEntryFilter::Directory) &&
446 entry.status().type() == fs::file_type::directory) { 446 entry.status().type() == fs::file_type::directory) {
447 if (!callback(entry.path())) { 447 if (!callback(entry)) {
448 callback_error = true; 448 callback_error = true;
449 break; 449 break;
450 } 450 }
@@ -493,7 +493,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path,
493 493
494 if (True(filter & DirEntryFilter::File) && 494 if (True(filter & DirEntryFilter::File) &&
495 entry.status().type() == fs::file_type::regular) { 495 entry.status().type() == fs::file_type::regular) {
496 if (!callback(entry.path())) { 496 if (!callback(entry)) {
497 callback_error = true; 497 callback_error = true;
498 break; 498 break;
499 } 499 }
@@ -501,7 +501,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path,
501 501
502 if (True(filter & DirEntryFilter::Directory) && 502 if (True(filter & DirEntryFilter::Directory) &&
503 entry.status().type() == fs::file_type::directory) { 503 entry.status().type() == fs::file_type::directory) {
504 if (!callback(entry.path())) { 504 if (!callback(entry)) {
505 callback_error = true; 505 callback_error = true;
506 break; 506 break;
507 } 507 }
diff --git a/src/common/fs/fs_types.h b/src/common/fs/fs_types.h
index 5a4090c19..900f85d24 100644
--- a/src/common/fs/fs_types.h
+++ b/src/common/fs/fs_types.h
@@ -66,6 +66,6 @@ DECLARE_ENUM_FLAG_OPERATORS(DirEntryFilter);
66 * @returns A boolean value. 66 * @returns A boolean value.
67 * Return true to indicate whether the callback is successful, false otherwise. 67 * Return true to indicate whether the callback is successful, false otherwise.
68 */ 68 */
69using DirEntryCallable = std::function<bool(const std::filesystem::path& path)>; 69using DirEntryCallable = std::function<bool(const std::filesystem::directory_entry& entry)>;
70 70
71} // namespace Common::FS 71} // namespace Common::FS
diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h
index 4c328ab44..416680d44 100644
--- a/src/common/ring_buffer.h
+++ b/src/common/ring_buffer.h
@@ -9,6 +9,7 @@
9#include <cstddef> 9#include <cstddef>
10#include <cstring> 10#include <cstring>
11#include <new> 11#include <new>
12#include <span>
12#include <type_traits> 13#include <type_traits>
13#include <vector> 14#include <vector>
14 15
@@ -53,7 +54,7 @@ public:
53 return push_count; 54 return push_count;
54 } 55 }
55 56
56 std::size_t Push(const std::vector<T>& input) { 57 std::size_t Push(const std::span<T> input) {
57 return Push(input.data(), input.size()); 58 return Push(input.data(), input.size());
58 } 59 }
59 60
diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h
index a69a5a7af..6fe907953 100644
--- a/src/common/scratch_buffer.h
+++ b/src/common/scratch_buffer.h
@@ -3,6 +3,9 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <iterator>
7
8#include "common/concepts.h"
6#include "common/make_unique_for_overwrite.h" 9#include "common/make_unique_for_overwrite.h"
7 10
8namespace Common { 11namespace Common {
@@ -16,6 +19,12 @@ namespace Common {
16template <typename T> 19template <typename T>
17class ScratchBuffer { 20class ScratchBuffer {
18public: 21public:
22 using iterator = T*;
23 using const_iterator = const T*;
24 using value_type = T;
25 using element_type = T;
26 using iterator_category = std::contiguous_iterator_tag;
27
19 ScratchBuffer() = default; 28 ScratchBuffer() = default;
20 29
21 explicit ScratchBuffer(size_t initial_capacity) 30 explicit ScratchBuffer(size_t initial_capacity)
diff --git a/src/common/settings.h b/src/common/settings.h
index 9682281b0..3aedf3850 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -483,6 +483,7 @@ struct Values {
483 AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, 483 AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3,
484 "astc_recompression"}; 484 "astc_recompression"};
485 SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; 485 SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"};
486 SwitchableSetting<bool> barrier_feedback_loops{true, "barrier_feedback_loops"};
486 487
487 SwitchableSetting<u8> bg_red{0, "bg_red"}; 488 SwitchableSetting<u8> bg_red{0, "bg_red"};
488 SwitchableSetting<u8> bg_green{0, "bg_green"}; 489 SwitchableSetting<u8> bg_green{0, "bg_green"};
diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp
index 782859196..9415eed29 100644
--- a/src/common/steady_clock.cpp
+++ b/src/common/steady_clock.cpp
@@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() {
28 // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. 28 // GetSystemTimePreciseAsFileTime returns the file time in 100ns units.
29 static constexpr s64 Multiplier = 100; 29 static constexpr s64 Multiplier = 100;
30 // Convert Windows epoch to Unix epoch. 30 // Convert Windows epoch to Unix epoch.
31 static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; 31 static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL;
32 32
33 FILETIME filetime; 33 FILETIME filetime;
34 GetSystemTimePreciseAsFileTime(&filetime); 34 GetSystemTimePreciseAsFileTime(&filetime);
35 return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + 35 return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) +
36 static_cast<s64>(filetime.dwLowDateTime)) - 36 static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch);
37 WindowsEpochToUnixEpochNS;
38} 37}
39#endif 38#endif
40 39
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 817e71d52..dc0dcbd68 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -2,88 +2,75 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/steady_clock.h" 4#include "common/steady_clock.h"
5#include "common/uint128.h"
6#include "common/wall_clock.h" 5#include "common/wall_clock.h"
7 6
8#ifdef ARCHITECTURE_x86_64 7#ifdef ARCHITECTURE_x86_64
9#include "common/x64/cpu_detect.h" 8#include "common/x64/cpu_detect.h"
10#include "common/x64/native_clock.h" 9#include "common/x64/native_clock.h"
10#include "common/x64/rdtsc.h"
11#endif 11#endif
12 12
13namespace Common { 13namespace Common {
14 14
15class StandardWallClock final : public WallClock { 15class StandardWallClock final : public WallClock {
16public: 16public:
17 explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) 17 explicit StandardWallClock() : start_time{SteadyClock::Now()} {}
18 : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false},
19 start_time{SteadyClock::Now()} {}
20 18
21 std::chrono::nanoseconds GetTimeNS() override { 19 std::chrono::nanoseconds GetTimeNS() const override {
22 return SteadyClock::Now() - start_time; 20 return SteadyClock::Now() - start_time;
23 } 21 }
24 22
25 std::chrono::microseconds GetTimeUS() override { 23 std::chrono::microseconds GetTimeUS() const override {
26 return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); 24 return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den);
27 } 25 }
28 26
29 std::chrono::milliseconds GetTimeMS() override { 27 std::chrono::milliseconds GetTimeMS() const override {
30 return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); 28 return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den);
31 } 29 }
32 30
33 u64 GetClockCycles() override { 31 u64 GetCNTPCT() const override {
34 const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); 32 return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
35 return Common::Divide128On32(temp, NS_RATIO).first;
36 } 33 }
37 34
38 u64 GetCPUCycles() override { 35 u64 GetGPUTick() const override {
39 const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); 36 return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
40 return Common::Divide128On32(temp, NS_RATIO).first;
41 } 37 }
42 38
43 void Pause([[maybe_unused]] bool is_paused) override { 39 u64 GetHostTicksNow() const override {
44 // Do nothing in this clock type. 40 return static_cast<u64>(SteadyClock::Now().time_since_epoch().count());
41 }
42
43 u64 GetHostTicksElapsed() const override {
44 return static_cast<u64>(GetTimeNS().count());
45 }
46
47 bool IsNative() const override {
48 return false;
45 } 49 }
46 50
47private: 51private:
48 SteadyClock::time_point start_time; 52 SteadyClock::time_point start_time;
49}; 53};
50 54
55std::unique_ptr<WallClock> CreateOptimalClock() {
51#ifdef ARCHITECTURE_x86_64 56#ifdef ARCHITECTURE_x86_64
52
53std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
54 u64 emulated_clock_frequency) {
55 const auto& caps = GetCPUCaps(); 57 const auto& caps = GetCPUCaps();
56 u64 rtsc_frequency = 0;
57 if (caps.invariant_tsc) {
58 rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency();
59 }
60 58
61 // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: 59 if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) {
62 // - A nanosecond 60 return std::make_unique<X64::NativeClock>(caps.tsc_frequency);
63 // - The emulated CPU frequency
64 // - The emulated clock counter frequency (CNTFRQ)
65 if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency ||
66 rtsc_frequency <= emulated_clock_frequency) {
67 return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
68 emulated_clock_frequency);
69 } else { 61 } else {
70 return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, 62 // Fallback to StandardWallClock if the hardware TSC
71 rtsc_frequency); 63 // - Is not invariant
64 // - Is not more precise than GPUTickFreq
65 return std::make_unique<StandardWallClock>();
72 } 66 }
73}
74
75#else 67#else
76 68 return std::make_unique<StandardWallClock>();
77std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
78 u64 emulated_clock_frequency) {
79 return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
80}
81
82#endif 69#endif
70}
83 71
84std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, 72std::unique_ptr<WallClock> CreateStandardWallClock() {
85 u64 emulated_clock_frequency) { 73 return std::make_unique<StandardWallClock>();
86 return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
87} 74}
88 75
89} // namespace Common 76} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index 157ec5eae..f45d3d8c5 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -5,6 +5,7 @@
5 5
6#include <chrono> 6#include <chrono>
7#include <memory> 7#include <memory>
8#include <ratio>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
10 11
@@ -12,50 +13,82 @@ namespace Common {
12 13
13class WallClock { 14class WallClock {
14public: 15public:
15 static constexpr u64 NS_RATIO = 1'000'000'000; 16 static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
16 static constexpr u64 US_RATIO = 1'000'000; 17 static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz
17 static constexpr u64 MS_RATIO = 1'000; 18 static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz
18 19
19 virtual ~WallClock() = default; 20 virtual ~WallClock() = default;
20 21
21 /// Returns current wall time in nanoseconds 22 /// @returns The time in nanoseconds since the construction of this clock.
22 [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; 23 virtual std::chrono::nanoseconds GetTimeNS() const = 0;
23 24
24 /// Returns current wall time in microseconds 25 /// @returns The time in microseconds since the construction of this clock.
25 [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; 26 virtual std::chrono::microseconds GetTimeUS() const = 0;
26 27
27 /// Returns current wall time in milliseconds 28 /// @returns The time in milliseconds since the construction of this clock.
28 [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; 29 virtual std::chrono::milliseconds GetTimeMS() const = 0;
29 30
30 /// Returns current wall time in emulated clock cycles 31 /// @returns The guest CNTPCT ticks since the construction of this clock.
31 [[nodiscard]] virtual u64 GetClockCycles() = 0; 32 virtual u64 GetCNTPCT() const = 0;
32 33
33 /// Returns current wall time in emulated cpu cycles 34 /// @returns The guest GPU ticks since the construction of this clock.
34 [[nodiscard]] virtual u64 GetCPUCycles() = 0; 35 virtual u64 GetGPUTick() const = 0;
35 36
36 virtual void Pause(bool is_paused) = 0; 37 /// @returns The raw host timer ticks since an indeterminate epoch.
38 virtual u64 GetHostTicksNow() const = 0;
37 39
38 /// Tells if the wall clock, uses the host CPU's hardware clock 40 /// @returns The raw host timer ticks since the construction of this clock.
39 [[nodiscard]] bool IsNative() const { 41 virtual u64 GetHostTicksElapsed() const = 0;
40 return is_native; 42
43 /// @returns Whether the clock directly uses the host's hardware clock.
44 virtual bool IsNative() const = 0;
45
46 static inline u64 NSToCNTPCT(u64 ns) {
47 return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
48 }
49
50 static inline u64 NSToGPUTick(u64 ns) {
51 return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
52 }
53
54 // Cycle Timing
55
56 static inline u64 CPUTickToNS(u64 cpu_tick) {
57 return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den;
58 }
59
60 static inline u64 CPUTickToUS(u64 cpu_tick) {
61 return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den;
62 }
63
64 static inline u64 CPUTickToCNTPCT(u64 cpu_tick) {
65 return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den;
66 }
67
68 static inline u64 CPUTickToGPUTick(u64 cpu_tick) {
69 return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den;
41 } 70 }
42 71
43protected: 72protected:
44 explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) 73 using NsRatio = std::nano;
45 : emulated_cpu_frequency{emulated_cpu_frequency_}, 74 using UsRatio = std::micro;
46 emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} 75 using MsRatio = std::milli;
76
77 using NsToUsRatio = std::ratio_divide<std::nano, std::micro>;
78 using NsToMsRatio = std::ratio_divide<std::nano, std::milli>;
79 using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>;
80 using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>;
47 81
48 u64 emulated_cpu_frequency; 82 // Cycle Timing
49 u64 emulated_clock_frequency;
50 83
51private: 84 using CPUTickToNsRatio = std::ratio<std::nano::den, CPUTickFreq>;
52 bool is_native; 85 using CPUTickToUsRatio = std::ratio<std::micro::den, CPUTickFreq>;
86 using CPUTickToCNTPCTRatio = std::ratio<CNTFRQ, CPUTickFreq>;
87 using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>;
53}; 88};
54 89
55[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, 90std::unique_ptr<WallClock> CreateOptimalClock();
56 u64 emulated_clock_frequency);
57 91
58[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, 92std::unique_ptr<WallClock> CreateStandardWallClock();
59 u64 emulated_clock_frequency);
60 93
61} // namespace Common 94} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 72ed6e96c..c998b1197 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -14,6 +14,7 @@
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "common/x64/cpu_detect.h" 16#include "common/x64/cpu_detect.h"
17#include "common/x64/rdtsc.h"
17 18
18#ifdef _WIN32 19#ifdef _WIN32
19#include <windows.h> 20#include <windows.h>
@@ -187,6 +188,8 @@ static CPUCaps Detect() {
187 caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * 188 caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) *
188 caps.tsc_crystal_ratio_numerator / 189 caps.tsc_crystal_ratio_numerator /
189 caps.tsc_crystal_ratio_denominator; 190 caps.tsc_crystal_ratio_denominator;
191 } else {
192 caps.tsc_frequency = X64::EstimateRDTSCFrequency();
190 } 193 }
191 } 194 }
192 195
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp
index cfeef6a3d..c53dd4945 100644
--- a/src/common/x64/cpu_wait.cpp
+++ b/src/common/x64/cpu_wait.cpp
@@ -9,19 +9,11 @@
9 9
10#include "common/x64/cpu_detect.h" 10#include "common/x64/cpu_detect.h"
11#include "common/x64/cpu_wait.h" 11#include "common/x64/cpu_wait.h"
12#include "common/x64/rdtsc.h"
12 13
13namespace Common::X64 { 14namespace Common::X64 {
14 15
15#ifdef _MSC_VER 16#ifdef _MSC_VER
16__forceinline static u64 FencedRDTSC() {
17 _mm_lfence();
18 _ReadWriteBarrier();
19 const u64 result = __rdtsc();
20 _mm_lfence();
21 _ReadWriteBarrier();
22 return result;
23}
24
25__forceinline static void TPAUSE() { 17__forceinline static void TPAUSE() {
26 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 18 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
27 // For reference: 19 // For reference:
@@ -32,16 +24,6 @@ __forceinline static void TPAUSE() {
32 _tpause(0, FencedRDTSC() + PauseCycles); 24 _tpause(0, FencedRDTSC() + PauseCycles);
33} 25}
34#else 26#else
35static u64 FencedRDTSC() {
36 u64 eax;
37 u64 edx;
38 asm volatile("lfence\n\t"
39 "rdtsc\n\t"
40 "lfence\n\t"
41 : "=a"(eax), "=d"(edx));
42 return (edx << 32) | eax;
43}
44
45static void TPAUSE() { 27static void TPAUSE() {
46 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 28 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
47 // For reference: 29 // For reference:
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 277b00662..7d2a26bd9 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -1,164 +1,50 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <array>
5#include <chrono>
6#include <thread>
7
8#include "common/atomic_ops.h"
9#include "common/steady_clock.h"
10#include "common/uint128.h" 4#include "common/uint128.h"
11#include "common/x64/native_clock.h" 5#include "common/x64/native_clock.h"
6#include "common/x64/rdtsc.h"
12 7
13#ifdef _MSC_VER 8namespace Common::X64 {
14#include <intrin.h>
15#endif
16
17namespace Common {
18 9
19#ifdef _MSC_VER 10NativeClock::NativeClock(u64 rdtsc_frequency_)
20__forceinline static u64 FencedRDTSC() { 11 : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_},
21 _mm_lfence(); 12 ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)},
22 _ReadWriteBarrier(); 13 us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)},
23 const u64 result = __rdtsc(); 14 ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)},
24 _mm_lfence(); 15 cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)},
25 _ReadWriteBarrier(); 16 gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {}
26 return result;
27}
28#else
29static u64 FencedRDTSC() {
30 u64 eax;
31 u64 edx;
32 asm volatile("lfence\n\t"
33 "rdtsc\n\t"
34 "lfence\n\t"
35 : "=a"(eax), "=d"(edx));
36 return (edx << 32) | eax;
37}
38#endif
39 17
40template <u64 Nearest> 18std::chrono::nanoseconds NativeClock::GetTimeNS() const {
41static u64 RoundToNearest(u64 value) { 19 return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)};
42 const auto mod = value % Nearest;
43 return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
44} 20}
45 21
46u64 EstimateRDTSCFrequency() { 22std::chrono::microseconds NativeClock::GetTimeUS() const {
47 // Discard the first result measuring the rdtsc. 23 return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)};
48 FencedRDTSC();
49 std::this_thread::sleep_for(std::chrono::milliseconds{1});
50 FencedRDTSC();
51
52 // Get the current time.
53 const auto start_time = Common::RealTimeClock::Now();
54 const u64 tsc_start = FencedRDTSC();
55 // Wait for 250 milliseconds.
56 std::this_thread::sleep_for(std::chrono::milliseconds{250});
57 const auto end_time = Common::RealTimeClock::Now();
58 const u64 tsc_end = FencedRDTSC();
59 // Calculate differences.
60 const u64 timer_diff = static_cast<u64>(
61 std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
62 const u64 tsc_diff = tsc_end - tsc_start;
63 const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
64 return RoundToNearest<1000>(tsc_freq);
65} 24}
66 25
67namespace X64 { 26std::chrono::milliseconds NativeClock::GetTimeMS() const {
68NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, 27 return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)};
69 u64 rtsc_frequency_)
70 : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
71 rtsc_frequency_} {
72 // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed.
73 time_sync_thread = std::jthread{[this](std::stop_token token) {
74 // Get the current time.
75 const auto start_time = Common::RealTimeClock::Now();
76 const u64 tsc_start = FencedRDTSC();
77 // Wait for 10 seconds.
78 if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) {
79 return;
80 }
81 const auto end_time = Common::RealTimeClock::Now();
82 const u64 tsc_end = FencedRDTSC();
83 // Calculate differences.
84 const u64 timer_diff = static_cast<u64>(
85 std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
86 const u64 tsc_diff = tsc_end - tsc_start;
87 const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
88 rtsc_frequency = tsc_freq;
89 CalculateAndSetFactors();
90 }};
91
92 time_point.inner.last_measure = FencedRDTSC();
93 time_point.inner.accumulated_ticks = 0U;
94 CalculateAndSetFactors();
95} 28}
96 29
97u64 NativeClock::GetRTSC() { 30u64 NativeClock::GetCNTPCT() const {
98 TimePoint new_time_point{}; 31 return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor);
99 TimePoint current_time_point{};
100
101 current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
102 do {
103 const u64 current_measure = FencedRDTSC();
104 u64 diff = current_measure - current_time_point.inner.last_measure;
105 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
106 new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
107 ? current_measure
108 : current_time_point.inner.last_measure;
109 new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
110 } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
111 current_time_point.pack, current_time_point.pack));
112 return new_time_point.inner.accumulated_ticks;
113} 32}
114 33
115void NativeClock::Pause(bool is_paused) { 34u64 NativeClock::GetGPUTick() const {
116 if (!is_paused) { 35 return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor);
117 TimePoint current_time_point{};
118 TimePoint new_time_point{};
119
120 current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
121 do {
122 new_time_point.pack = current_time_point.pack;
123 new_time_point.inner.last_measure = FencedRDTSC();
124 } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
125 current_time_point.pack, current_time_point.pack));
126 }
127} 36}
128 37
129std::chrono::nanoseconds NativeClock::GetTimeNS() { 38u64 NativeClock::GetHostTicksNow() const {
130 const u64 rtsc_value = GetRTSC(); 39 return FencedRDTSC();
131 return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)};
132} 40}
133 41
134std::chrono::microseconds NativeClock::GetTimeUS() { 42u64 NativeClock::GetHostTicksElapsed() const {
135 const u64 rtsc_value = GetRTSC(); 43 return FencedRDTSC() - start_ticks;
136 return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)};
137} 44}
138 45
139std::chrono::milliseconds NativeClock::GetTimeMS() { 46bool NativeClock::IsNative() const {
140 const u64 rtsc_value = GetRTSC(); 47 return true;
141 return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)};
142} 48}
143 49
144u64 NativeClock::GetClockCycles() { 50} // namespace Common::X64
145 const u64 rtsc_value = GetRTSC();
146 return MultiplyHigh(rtsc_value, clock_rtsc_factor);
147}
148
149u64 NativeClock::GetCPUCycles() {
150 const u64 rtsc_value = GetRTSC();
151 return MultiplyHigh(rtsc_value, cpu_rtsc_factor);
152}
153
154void NativeClock::CalculateAndSetFactors() {
155 ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
156 us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
157 ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency);
158 clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency);
159 cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency);
160}
161
162} // namespace X64
163
164} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index 03ca291d8..334415eff 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -3,58 +3,39 @@
3 3
4#pragma once 4#pragma once
5 5
6#include "common/polyfill_thread.h"
7#include "common/wall_clock.h" 6#include "common/wall_clock.h"
8 7
9namespace Common { 8namespace Common::X64 {
10 9
11namespace X64 {
12class NativeClock final : public WallClock { 10class NativeClock final : public WallClock {
13public: 11public:
14 explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, 12 explicit NativeClock(u64 rdtsc_frequency_);
15 u64 rtsc_frequency_);
16 13
17 std::chrono::nanoseconds GetTimeNS() override; 14 std::chrono::nanoseconds GetTimeNS() const override;
18 15
19 std::chrono::microseconds GetTimeUS() override; 16 std::chrono::microseconds GetTimeUS() const override;
20 17
21 std::chrono::milliseconds GetTimeMS() override; 18 std::chrono::milliseconds GetTimeMS() const override;
22 19
23 u64 GetClockCycles() override; 20 u64 GetCNTPCT() const override;
24 21
25 u64 GetCPUCycles() override; 22 u64 GetGPUTick() const override;
26 23
27 void Pause(bool is_paused) override; 24 u64 GetHostTicksNow() const override;
28 25
29private: 26 u64 GetHostTicksElapsed() const override;
30 u64 GetRTSC();
31
32 void CalculateAndSetFactors();
33
34 union alignas(16) TimePoint {
35 TimePoint() : pack{} {}
36 u128 pack{};
37 struct Inner {
38 u64 last_measure{};
39 u64 accumulated_ticks{};
40 } inner;
41 };
42
43 TimePoint time_point;
44 27
45 // factors 28 bool IsNative() const override;
46 u64 clock_rtsc_factor{};
47 u64 cpu_rtsc_factor{};
48 u64 ns_rtsc_factor{};
49 u64 us_rtsc_factor{};
50 u64 ms_rtsc_factor{};
51 29
52 u64 rtsc_frequency; 30private:
53 31 u64 start_ticks;
54 std::jthread time_sync_thread; 32 u64 rdtsc_frequency;
33
34 u64 ns_rdtsc_factor;
35 u64 us_rdtsc_factor;
36 u64 ms_rdtsc_factor;
37 u64 cntpct_rdtsc_factor;
38 u64 gputick_rdtsc_factor;
55}; 39};
56} // namespace X64
57
58u64 EstimateRDTSCFrequency();
59 40
60} // namespace Common 41} // namespace Common::X64
diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp
new file mode 100644
index 000000000..9273274a3
--- /dev/null
+++ b/src/common/x64/rdtsc.cpp
@@ -0,0 +1,39 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <thread>
5
6#include "common/steady_clock.h"
7#include "common/uint128.h"
8#include "common/x64/rdtsc.h"
9
10namespace Common::X64 {
11
12template <u64 Nearest>
13static u64 RoundToNearest(u64 value) {
14 const auto mod = value % Nearest;
15 return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
16}
17
18u64 EstimateRDTSCFrequency() {
19 // Discard the first result measuring the rdtsc.
20 FencedRDTSC();
21 std::this_thread::sleep_for(std::chrono::milliseconds{1});
22 FencedRDTSC();
23
24 // Get the current time.
25 const auto start_time = RealTimeClock::Now();
26 const u64 tsc_start = FencedRDTSC();
27 // Wait for 100 milliseconds.
28 std::this_thread::sleep_for(std::chrono::milliseconds{100});
29 const auto end_time = RealTimeClock::Now();
30 const u64 tsc_end = FencedRDTSC();
31 // Calculate differences.
32 const u64 timer_diff = static_cast<u64>(
33 std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
34 const u64 tsc_diff = tsc_end - tsc_start;
35 const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
36 return RoundToNearest<100'000>(tsc_freq);
37}
38
39} // namespace Common::X64
diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h
new file mode 100644
index 000000000..0ec4f52f9
--- /dev/null
+++ b/src/common/x64/rdtsc.h
@@ -0,0 +1,37 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#ifdef _MSC_VER
7#include <intrin.h>
8#endif
9
10#include "common/common_types.h"
11
12namespace Common::X64 {
13
14#ifdef _MSC_VER
15__forceinline static u64 FencedRDTSC() {
16 _mm_lfence();
17 _ReadWriteBarrier();
18 const u64 result = __rdtsc();
19 _mm_lfence();
20 _ReadWriteBarrier();
21 return result;
22}
23#else
24static inline u64 FencedRDTSC() {
25 u64 eax;
26 u64 edx;
27 asm volatile("lfence\n\t"
28 "rdtsc\n\t"
29 "lfence\n\t"
30 : "=a"(eax), "=d"(edx));
31 return (edx << 32) | eax;
32}
33#endif
34
35u64 EstimateRDTSCFrequency();
36
37} // namespace Common::X64
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 227c431bc..3655b8478 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -14,7 +14,6 @@ add_library(core STATIC
14 core.h 14 core.h
15 core_timing.cpp 15 core_timing.cpp
16 core_timing.h 16 core_timing.h
17 core_timing_util.h
18 cpu_manager.cpp 17 cpu_manager.cpp
19 cpu_manager.h 18 cpu_manager.h
20 crypto/aes_util.cpp 19 crypto/aes_util.cpp
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 4f2692b05..4f0a3f8ea 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -16,12 +16,11 @@
16 16
17#include "common/microprofile.h" 17#include "common/microprofile.h"
18#include "core/core_timing.h" 18#include "core/core_timing.h"
19#include "core/core_timing_util.h"
20#include "core/hardware_properties.h" 19#include "core/hardware_properties.h"
21 20
22namespace Core::Timing { 21namespace Core::Timing {
23 22
24constexpr s64 MAX_SLICE_LENGTH = 4000; 23constexpr s64 MAX_SLICE_LENGTH = 10000;
25 24
26std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { 25std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
27 return std::make_shared<EventType>(std::move(callback), std::move(name)); 26 return std::make_shared<EventType>(std::move(callback), std::move(name));
@@ -45,9 +44,7 @@ struct CoreTiming::Event {
45 } 44 }
46}; 45};
47 46
48CoreTiming::CoreTiming() 47CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {}
49 : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)},
50 event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
51 48
52CoreTiming::~CoreTiming() { 49CoreTiming::~CoreTiming() {
53 Reset(); 50 Reset();
@@ -68,7 +65,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
68 on_thread_init = std::move(on_thread_init_); 65 on_thread_init = std::move(on_thread_init_);
69 event_fifo_id = 0; 66 event_fifo_id = 0;
70 shutting_down = false; 67 shutting_down = false;
71 ticks = 0; 68 cpu_ticks = 0;
72 const auto empty_timed_callback = [](std::uintptr_t, u64, std::chrono::nanoseconds) 69 const auto empty_timed_callback = [](std::uintptr_t, u64, std::chrono::nanoseconds)
73 -> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; 70 -> std::optional<std::chrono::nanoseconds> { return std::nullopt; };
74 ev_lost = CreateEvent("_lost_event", empty_timed_callback); 71 ev_lost = CreateEvent("_lost_event", empty_timed_callback);
@@ -173,38 +170,30 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
173} 170}
174 171
175void CoreTiming::AddTicks(u64 ticks_to_add) { 172void CoreTiming::AddTicks(u64 ticks_to_add) {
176 ticks += ticks_to_add; 173 cpu_ticks += ticks_to_add;
177 downcount -= static_cast<s64>(ticks); 174 downcount -= static_cast<s64>(cpu_ticks);
178} 175}
179 176
180void CoreTiming::Idle() { 177void CoreTiming::Idle() {
181 if (!event_queue.empty()) { 178 cpu_ticks += 1000U;
182 const u64 next_event_time = event_queue.front().time;
183 const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
184 if (next_ticks > ticks) {
185 ticks = next_ticks;
186 }
187 return;
188 }
189 ticks += 1000U;
190} 179}
191 180
192void CoreTiming::ResetTicks() { 181void CoreTiming::ResetTicks() {
193 downcount = MAX_SLICE_LENGTH; 182 downcount = MAX_SLICE_LENGTH;
194} 183}
195 184
196u64 CoreTiming::GetCPUTicks() const { 185u64 CoreTiming::GetClockTicks() const {
197 if (is_multicore) [[likely]] { 186 if (is_multicore) [[likely]] {
198 return cpu_clock->GetCPUCycles(); 187 return clock->GetCNTPCT();
199 } 188 }
200 return ticks; 189 return Common::WallClock::CPUTickToCNTPCT(cpu_ticks);
201} 190}
202 191
203u64 CoreTiming::GetClockTicks() const { 192u64 CoreTiming::GetGPUTicks() const {
204 if (is_multicore) [[likely]] { 193 if (is_multicore) [[likely]] {
205 return cpu_clock->GetClockCycles(); 194 return clock->GetGPUTick();
206 } 195 }
207 return CpuCyclesToClockCycles(ticks); 196 return Common::WallClock::CPUTickToGPUTick(cpu_ticks);
208} 197}
209 198
210std::optional<s64> CoreTiming::Advance() { 199std::optional<s64> CoreTiming::Advance() {
@@ -297,9 +286,7 @@ void CoreTiming::ThreadLoop() {
297 } 286 }
298 287
299 paused_set = true; 288 paused_set = true;
300 event_clock->Pause(true);
301 pause_event.Wait(); 289 pause_event.Wait();
302 event_clock->Pause(false);
303 } 290 }
304} 291}
305 292
@@ -315,25 +302,18 @@ void CoreTiming::Reset() {
315 has_started = false; 302 has_started = false;
316} 303}
317 304
318std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const {
319 if (is_multicore) [[likely]] {
320 return cpu_clock->GetTimeNS();
321 }
322 return CyclesToNs(ticks);
323}
324
325std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { 305std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
326 if (is_multicore) [[likely]] { 306 if (is_multicore) [[likely]] {
327 return event_clock->GetTimeNS(); 307 return clock->GetTimeNS();
328 } 308 }
329 return CyclesToNs(ticks); 309 return std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)};
330} 310}
331 311
332std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { 312std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
333 if (is_multicore) [[likely]] { 313 if (is_multicore) [[likely]] {
334 return event_clock->GetTimeUS(); 314 return clock->GetTimeUS();
335 } 315 }
336 return CyclesToUs(ticks); 316 return std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)};
337} 317}
338 318
339} // namespace Core::Timing 319} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index e7c4a949f..10db1de55 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -116,14 +116,11 @@ public:
116 return downcount; 116 return downcount;
117 } 117 }
118 118
119 /// Returns current time in emulated CPU cycles 119 /// Returns the current CNTPCT tick value.
120 u64 GetCPUTicks() const;
121
122 /// Returns current time in emulated in Clock cycles
123 u64 GetClockTicks() const; 120 u64 GetClockTicks() const;
124 121
125 /// Returns current time in nanoseconds. 122 /// Returns the current GPU tick value.
126 std::chrono::nanoseconds GetCPUTimeNs() const; 123 u64 GetGPUTicks() const;
127 124
128 /// Returns current time in microseconds. 125 /// Returns current time in microseconds.
129 std::chrono::microseconds GetGlobalTimeUs() const; 126 std::chrono::microseconds GetGlobalTimeUs() const;
@@ -142,8 +139,7 @@ private:
142 139
143 void Reset(); 140 void Reset();
144 141
145 std::unique_ptr<Common::WallClock> cpu_clock; 142 std::unique_ptr<Common::WallClock> clock;
146 std::unique_ptr<Common::WallClock> event_clock;
147 143
148 s64 global_timer = 0; 144 s64 global_timer = 0;
149 145
@@ -171,7 +167,7 @@ private:
171 s64 pause_end_time{}; 167 s64 pause_end_time{};
172 168
173 /// Cycle timing 169 /// Cycle timing
174 u64 ticks{}; 170 u64 cpu_ticks{};
175 s64 downcount{}; 171 s64 downcount{};
176}; 172};
177 173
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
deleted file mode 100644
index fe5aaefc7..000000000
--- a/src/core/core_timing_util.h
+++ /dev/null
@@ -1,58 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <chrono>
7
8#include "common/common_types.h"
9#include "core/hardware_properties.h"
10
11namespace Core::Timing {
12
13namespace detail {
14constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000;
15constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000;
16} // namespace detail
17
18[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) {
19 return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED;
20}
21
22[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) {
23 return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000;
24}
25
26[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) {
27 return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000;
28}
29
30[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) {
31 return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED;
32}
33
34[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) {
35 return us.count() * detail::CNTFREQ_ADJUSTED / 1000;
36}
37
38[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) {
39 return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000;
40}
41
42[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) {
43 return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED;
44}
45
46[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) {
47 return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED);
48}
49
50[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) {
51 return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED);
52}
53
54[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) {
55 return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED);
56}
57
58} // namespace Core::Timing
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 4e61d4335..d3286b352 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -153,7 +153,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
153 const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); 153 const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
154 154
155 std::vector<VirtualDir> patch_dirs = {sdmc_load_dir}; 155 std::vector<VirtualDir> patch_dirs = {sdmc_load_dir};
156 if (load_dir != nullptr && load_dir->GetSize() > 0) { 156 if (load_dir != nullptr) {
157 const auto load_patch_dirs = load_dir->GetSubdirectories(); 157 const auto load_patch_dirs = load_dir->GetSubdirectories();
158 patch_dirs.insert(patch_dirs.end(), load_patch_dirs.begin(), load_patch_dirs.end()); 158 patch_dirs.insert(patch_dirs.end(), load_patch_dirs.begin(), load_patch_dirs.end());
159 } 159 }
@@ -354,8 +354,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
354 const auto load_dir = fs_controller.GetModificationLoadRoot(title_id); 354 const auto load_dir = fs_controller.GetModificationLoadRoot(title_id);
355 const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); 355 const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
356 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) || 356 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
357 ((load_dir == nullptr || load_dir->GetSize() <= 0) && 357 (load_dir == nullptr && sdmc_load_dir == nullptr)) {
358 (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) {
359 return; 358 return;
360 } 359 }
361 360
@@ -496,7 +495,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
496 495
497 // General Mods (LayeredFS and IPS) 496 // General Mods (LayeredFS and IPS)
498 const auto mod_dir = fs_controller.GetModificationLoadRoot(title_id); 497 const auto mod_dir = fs_controller.GetModificationLoadRoot(title_id);
499 if (mod_dir != nullptr && mod_dir->GetSize() > 0) { 498 if (mod_dir != nullptr) {
500 for (const auto& mod : mod_dir->GetSubdirectories()) { 499 for (const auto& mod : mod_dir->GetSubdirectories()) {
501 std::string types; 500 std::string types;
502 501
@@ -540,7 +539,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
540 539
541 // SDMC mod directory (RomFS LayeredFS) 540 // SDMC mod directory (RomFS LayeredFS)
542 const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); 541 const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
543 if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0) { 542 if (sdmc_mod_dir != nullptr) {
544 std::string types; 543 std::string types;
545 if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "exefs"))) { 544 if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "exefs"))) {
546 AppendCommaIfNotEmpty(types, "LayeredExeFS"); 545 AppendCommaIfNotEmpty(types, "LayeredExeFS");
diff --git a/src/core/file_sys/system_archive/time_zone_binary.cpp b/src/core/file_sys/system_archive/time_zone_binary.cpp
index ceb0b41c6..7c17bbefa 100644
--- a/src/core/file_sys/system_archive/time_zone_binary.cpp
+++ b/src/core/file_sys/system_archive/time_zone_binary.cpp
@@ -15,7 +15,7 @@ namespace FileSys::SystemArchive {
15const static std::map<std::string, const std::map<const char*, const std::vector<u8>>&> 15const static std::map<std::string, const std::map<const char*, const std::vector<u8>>&>
16 tzdb_zoneinfo_dirs = {{"Africa", NxTzdb::africa}, 16 tzdb_zoneinfo_dirs = {{"Africa", NxTzdb::africa},
17 {"America", NxTzdb::america}, 17 {"America", NxTzdb::america},
18 {"Antartica", NxTzdb::antartica}, 18 {"Antarctica", NxTzdb::antarctica},
19 {"Arctic", NxTzdb::arctic}, 19 {"Arctic", NxTzdb::arctic},
20 {"Asia", NxTzdb::asia}, 20 {"Asia", NxTzdb::asia},
21 {"Atlantic", NxTzdb::atlantic}, 21 {"Atlantic", NxTzdb::atlantic},
diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp
index 853b893a1..311a59e5f 100644
--- a/src/core/file_sys/vfs_concat.cpp
+++ b/src/core/file_sys/vfs_concat.cpp
@@ -150,23 +150,29 @@ std::size_t ConcatenatedVfsFile::Read(u8* data, std::size_t length, std::size_t
150 while (cur_length > 0 && it != concatenation_map.end()) { 150 while (cur_length > 0 && it != concatenation_map.end()) {
151 // Check if we can read the file at this position. 151 // Check if we can read the file at this position.
152 const auto& file = it->file; 152 const auto& file = it->file;
153 const u64 file_offset = it->offset; 153 const u64 map_offset = it->offset;
154 const u64 file_size = file->GetSize(); 154 const u64 file_size = file->GetSize();
155 155
156 if (cur_offset >= file_offset + file_size) { 156 if (cur_offset > map_offset + file_size) {
157 // Entirely out of bounds read. 157 // Entirely out of bounds read.
158 break; 158 break;
159 } 159 }
160 160
161 // Read the file at this position. 161 // Read the file at this position.
162 const u64 intended_read_size = std::min<u64>(cur_length, file_size); 162 const u64 file_seek = cur_offset - map_offset;
163 const u64 intended_read_size = std::min<u64>(cur_length, file_size - file_seek);
163 const u64 actual_read_size = 164 const u64 actual_read_size =
164 file->Read(data + (cur_offset - offset), intended_read_size, cur_offset - file_offset); 165 file->Read(data + (cur_offset - offset), intended_read_size, file_seek);
165 166
166 // Update tracking. 167 // Update tracking.
167 cur_offset += actual_read_size; 168 cur_offset += actual_read_size;
168 cur_length -= actual_read_size; 169 cur_length -= actual_read_size;
169 it++; 170 it++;
171
172 // If we encountered a short read, we're done.
173 if (actual_read_size < intended_read_size) {
174 break;
175 }
170 } 176 }
171 177
172 return cur_offset - offset; 178 return cur_offset - offset;
diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp
index 7a15d8438..b0515ec05 100644
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -10,6 +10,7 @@
10#include "common/fs/fs.h" 10#include "common/fs/fs.h"
11#include "common/fs/path_util.h" 11#include "common/fs/path_util.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "core/file_sys/vfs.h"
13#include "core/file_sys/vfs_real.h" 14#include "core/file_sys/vfs_real.h"
14 15
15// For FileTimeStampRaw 16// For FileTimeStampRaw
@@ -72,8 +73,10 @@ VfsEntryType RealVfsFilesystem::GetEntryType(std::string_view path_) const {
72 return VfsEntryType::File; 73 return VfsEntryType::File;
73} 74}
74 75
75VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) { 76VirtualFile RealVfsFilesystem::OpenFileFromEntry(std::string_view path_, std::optional<u64> size,
77 Mode perms) {
76 const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault); 78 const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault);
79 std::scoped_lock lk{list_lock};
77 80
78 if (auto it = cache.find(path); it != cache.end()) { 81 if (auto it = cache.find(path); it != cache.end()) {
79 if (auto file = it->second.lock(); file) { 82 if (auto file = it->second.lock(); file) {
@@ -81,23 +84,30 @@ VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) {
81 } 84 }
82 } 85 }
83 86
84 if (!FS::Exists(path) || !FS::IsFile(path)) { 87 if (!size && !FS::IsFile(path)) {
85 return nullptr; 88 return nullptr;
86 } 89 }
87 90
88 auto reference = std::make_unique<FileReference>(); 91 auto reference = std::make_unique<FileReference>();
89 this->InsertReferenceIntoList(*reference); 92 this->InsertReferenceIntoListLocked(*reference);
90 93
91 auto file = 94 auto file = std::shared_ptr<RealVfsFile>(
92 std::shared_ptr<RealVfsFile>(new RealVfsFile(*this, std::move(reference), path, perms)); 95 new RealVfsFile(*this, std::move(reference), path, perms, size));
93 cache[path] = file; 96 cache[path] = file;
94 97
95 return file; 98 return file;
96} 99}
97 100
101VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) {
102 return OpenFileFromEntry(path_, {}, perms);
103}
104
98VirtualFile RealVfsFilesystem::CreateFile(std::string_view path_, Mode perms) { 105VirtualFile RealVfsFilesystem::CreateFile(std::string_view path_, Mode perms) {
99 const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault); 106 const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault);
100 cache.erase(path); 107 {
108 std::scoped_lock lk{list_lock};
109 cache.erase(path);
110 }
101 111
102 // Current usages of CreateFile expect to delete the contents of an existing file. 112 // Current usages of CreateFile expect to delete the contents of an existing file.
103 if (FS::IsFile(path)) { 113 if (FS::IsFile(path)) {
@@ -127,8 +137,11 @@ VirtualFile RealVfsFilesystem::CopyFile(std::string_view old_path_, std::string_
127VirtualFile RealVfsFilesystem::MoveFile(std::string_view old_path_, std::string_view new_path_) { 137VirtualFile RealVfsFilesystem::MoveFile(std::string_view old_path_, std::string_view new_path_) {
128 const auto old_path = FS::SanitizePath(old_path_, FS::DirectorySeparator::PlatformDefault); 138 const auto old_path = FS::SanitizePath(old_path_, FS::DirectorySeparator::PlatformDefault);
129 const auto new_path = FS::SanitizePath(new_path_, FS::DirectorySeparator::PlatformDefault); 139 const auto new_path = FS::SanitizePath(new_path_, FS::DirectorySeparator::PlatformDefault);
130 cache.erase(old_path); 140 {
131 cache.erase(new_path); 141 std::scoped_lock lk{list_lock};
142 cache.erase(old_path);
143 cache.erase(new_path);
144 }
132 if (!FS::RenameFile(old_path, new_path)) { 145 if (!FS::RenameFile(old_path, new_path)) {
133 return nullptr; 146 return nullptr;
134 } 147 }
@@ -137,7 +150,10 @@ VirtualFile RealVfsFilesystem::MoveFile(std::string_view old_path_, std::string_
137 150
138bool RealVfsFilesystem::DeleteFile(std::string_view path_) { 151bool RealVfsFilesystem::DeleteFile(std::string_view path_) {
139 const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault); 152 const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault);
140 cache.erase(path); 153 {
154 std::scoped_lock lk{list_lock};
155 cache.erase(path);
156 }
141 return FS::RemoveFile(path); 157 return FS::RemoveFile(path);
142} 158}
143 159
@@ -176,14 +192,17 @@ bool RealVfsFilesystem::DeleteDirectory(std::string_view path_) {
176 return FS::RemoveDirRecursively(path); 192 return FS::RemoveDirRecursively(path);
177} 193}
178 194
179void RealVfsFilesystem::RefreshReference(const std::string& path, Mode perms, 195std::unique_lock<std::mutex> RealVfsFilesystem::RefreshReference(const std::string& path,
180 FileReference& reference) { 196 Mode perms,
197 FileReference& reference) {
198 std::unique_lock lk{list_lock};
199
181 // Temporarily remove from list. 200 // Temporarily remove from list.
182 this->RemoveReferenceFromList(reference); 201 this->RemoveReferenceFromListLocked(reference);
183 202
184 // Restore file if needed. 203 // Restore file if needed.
185 if (!reference.file) { 204 if (!reference.file) {
186 this->EvictSingleReference(); 205 this->EvictSingleReferenceLocked();
187 206
188 reference.file = 207 reference.file =
189 FS::FileOpen(path, ModeFlagsToFileAccessMode(perms), FS::FileType::BinaryFile); 208 FS::FileOpen(path, ModeFlagsToFileAccessMode(perms), FS::FileType::BinaryFile);
@@ -193,12 +212,16 @@ void RealVfsFilesystem::RefreshReference(const std::string& path, Mode perms,
193 } 212 }
194 213
195 // Reinsert into list. 214 // Reinsert into list.
196 this->InsertReferenceIntoList(reference); 215 this->InsertReferenceIntoListLocked(reference);
216
217 return lk;
197} 218}
198 219
199void RealVfsFilesystem::DropReference(std::unique_ptr<FileReference>&& reference) { 220void RealVfsFilesystem::DropReference(std::unique_ptr<FileReference>&& reference) {
221 std::scoped_lock lk{list_lock};
222
200 // Remove from list. 223 // Remove from list.
201 this->RemoveReferenceFromList(*reference); 224 this->RemoveReferenceFromListLocked(*reference);
202 225
203 // Close the file. 226 // Close the file.
204 if (reference->file) { 227 if (reference->file) {
@@ -207,14 +230,14 @@ void RealVfsFilesystem::DropReference(std::unique_ptr<FileReference>&& reference
207 } 230 }
208} 231}
209 232
210void RealVfsFilesystem::EvictSingleReference() { 233void RealVfsFilesystem::EvictSingleReferenceLocked() {
211 if (num_open_files < MaxOpenFiles || open_references.empty()) { 234 if (num_open_files < MaxOpenFiles || open_references.empty()) {
212 return; 235 return;
213 } 236 }
214 237
215 // Get and remove from list. 238 // Get and remove from list.
216 auto& reference = open_references.back(); 239 auto& reference = open_references.back();
217 this->RemoveReferenceFromList(reference); 240 this->RemoveReferenceFromListLocked(reference);
218 241
219 // Close the file. 242 // Close the file.
220 if (reference.file) { 243 if (reference.file) {
@@ -223,10 +246,10 @@ void RealVfsFilesystem::EvictSingleReference() {
223 } 246 }
224 247
225 // Reinsert into closed list. 248 // Reinsert into closed list.
226 this->InsertReferenceIntoList(reference); 249 this->InsertReferenceIntoListLocked(reference);
227} 250}
228 251
229void RealVfsFilesystem::InsertReferenceIntoList(FileReference& reference) { 252void RealVfsFilesystem::InsertReferenceIntoListLocked(FileReference& reference) {
230 if (reference.file) { 253 if (reference.file) {
231 open_references.push_front(reference); 254 open_references.push_front(reference);
232 } else { 255 } else {
@@ -234,7 +257,7 @@ void RealVfsFilesystem::InsertReferenceIntoList(FileReference& reference) {
234 } 257 }
235} 258}
236 259
237void RealVfsFilesystem::RemoveReferenceFromList(FileReference& reference) { 260void RealVfsFilesystem::RemoveReferenceFromListLocked(FileReference& reference) {
238 if (reference.file) { 261 if (reference.file) {
239 open_references.erase(open_references.iterator_to(reference)); 262 open_references.erase(open_references.iterator_to(reference));
240 } else { 263 } else {
@@ -243,10 +266,10 @@ void RealVfsFilesystem::RemoveReferenceFromList(FileReference& reference) {
243} 266}
244 267
245RealVfsFile::RealVfsFile(RealVfsFilesystem& base_, std::unique_ptr<FileReference> reference_, 268RealVfsFile::RealVfsFile(RealVfsFilesystem& base_, std::unique_ptr<FileReference> reference_,
246 const std::string& path_, Mode perms_) 269 const std::string& path_, Mode perms_, std::optional<u64> size_)
247 : base(base_), reference(std::move(reference_)), path(path_), 270 : base(base_), reference(std::move(reference_)), path(path_),
248 parent_path(FS::GetParentPath(path_)), path_components(FS::SplitPathComponents(path_)), 271 parent_path(FS::GetParentPath(path_)), path_components(FS::SplitPathComponents(path_)),
249 perms(perms_) {} 272 size(size_), perms(perms_) {}
250 273
251RealVfsFile::~RealVfsFile() { 274RealVfsFile::~RealVfsFile() {
252 base.DropReference(std::move(reference)); 275 base.DropReference(std::move(reference));
@@ -257,12 +280,15 @@ std::string RealVfsFile::GetName() const {
257} 280}
258 281
259std::size_t RealVfsFile::GetSize() const { 282std::size_t RealVfsFile::GetSize() const {
260 base.RefreshReference(path, perms, *reference); 283 if (size) {
261 return reference->file ? reference->file->GetSize() : 0; 284 return *size;
285 }
286 return FS::GetSize(path);
262} 287}
263 288
264bool RealVfsFile::Resize(std::size_t new_size) { 289bool RealVfsFile::Resize(std::size_t new_size) {
265 base.RefreshReference(path, perms, *reference); 290 size.reset();
291 auto lk = base.RefreshReference(path, perms, *reference);
266 return reference->file ? reference->file->SetSize(new_size) : false; 292 return reference->file ? reference->file->SetSize(new_size) : false;
267} 293}
268 294
@@ -279,7 +305,7 @@ bool RealVfsFile::IsReadable() const {
279} 305}
280 306
281std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const { 307std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const {
282 base.RefreshReference(path, perms, *reference); 308 auto lk = base.RefreshReference(path, perms, *reference);
283 if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) { 309 if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) {
284 return 0; 310 return 0;
285 } 311 }
@@ -287,7 +313,8 @@ std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset)
287} 313}
288 314
289std::size_t RealVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) { 315std::size_t RealVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) {
290 base.RefreshReference(path, perms, *reference); 316 size.reset();
317 auto lk = base.RefreshReference(path, perms, *reference);
291 if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) { 318 if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) {
292 return 0; 319 return 0;
293 } 320 }
@@ -309,10 +336,11 @@ std::vector<VirtualFile> RealVfsDirectory::IterateEntries<RealVfsFile, VfsFile>(
309 336
310 std::vector<VirtualFile> out; 337 std::vector<VirtualFile> out;
311 338
312 const FS::DirEntryCallable callback = [this, &out](const std::filesystem::path& full_path) { 339 const FS::DirEntryCallable callback = [this,
313 const auto full_path_string = FS::PathToUTF8String(full_path); 340 &out](const std::filesystem::directory_entry& entry) {
341 const auto full_path_string = FS::PathToUTF8String(entry.path());
314 342
315 out.emplace_back(base.OpenFile(full_path_string, perms)); 343 out.emplace_back(base.OpenFileFromEntry(full_path_string, entry.file_size(), perms));
316 344
317 return true; 345 return true;
318 }; 346 };
@@ -330,8 +358,9 @@ std::vector<VirtualDir> RealVfsDirectory::IterateEntries<RealVfsDirectory, VfsDi
330 358
331 std::vector<VirtualDir> out; 359 std::vector<VirtualDir> out;
332 360
333 const FS::DirEntryCallable callback = [this, &out](const std::filesystem::path& full_path) { 361 const FS::DirEntryCallable callback = [this,
334 const auto full_path_string = FS::PathToUTF8String(full_path); 362 &out](const std::filesystem::directory_entry& entry) {
363 const auto full_path_string = FS::PathToUTF8String(entry.path());
335 364
336 out.emplace_back(base.OpenDirectory(full_path_string, perms)); 365 out.emplace_back(base.OpenDirectory(full_path_string, perms));
337 366
@@ -483,12 +512,10 @@ std::map<std::string, VfsEntryType, std::less<>> RealVfsDirectory::GetEntries()
483 512
484 std::map<std::string, VfsEntryType, std::less<>> out; 513 std::map<std::string, VfsEntryType, std::less<>> out;
485 514
486 const FS::DirEntryCallable callback = [&out](const std::filesystem::path& full_path) { 515 const FS::DirEntryCallable callback = [&out](const std::filesystem::directory_entry& entry) {
487 const auto filename = FS::PathToUTF8String(full_path.filename()); 516 const auto filename = FS::PathToUTF8String(entry.path().filename());
488
489 out.insert_or_assign(filename, 517 out.insert_or_assign(filename,
490 FS::IsDir(full_path) ? VfsEntryType::Directory : VfsEntryType::File); 518 entry.is_directory() ? VfsEntryType::Directory : VfsEntryType::File);
491
492 return true; 519 return true;
493 }; 520 };
494 521
diff --git a/src/core/file_sys/vfs_real.h b/src/core/file_sys/vfs_real.h
index d8c900e33..26ea7df62 100644
--- a/src/core/file_sys/vfs_real.h
+++ b/src/core/file_sys/vfs_real.h
@@ -4,6 +4,8 @@
4#pragma once 4#pragma once
5 5
6#include <map> 6#include <map>
7#include <mutex>
8#include <optional>
7#include <string_view> 9#include <string_view>
8#include "common/intrusive_list.h" 10#include "common/intrusive_list.h"
9#include "core/file_sys/mode.h" 11#include "core/file_sys/mode.h"
@@ -20,6 +22,8 @@ struct FileReference : public Common::IntrusiveListBaseNode<FileReference> {
20}; 22};
21 23
22class RealVfsFile; 24class RealVfsFile;
25class RealVfsDirectory;
26
23class RealVfsFilesystem : public VfsFilesystem { 27class RealVfsFilesystem : public VfsFilesystem {
24public: 28public:
25 RealVfsFilesystem(); 29 RealVfsFilesystem();
@@ -45,17 +49,24 @@ private:
45 std::map<std::string, std::weak_ptr<VfsFile>, std::less<>> cache; 49 std::map<std::string, std::weak_ptr<VfsFile>, std::less<>> cache;
46 ReferenceListType open_references; 50 ReferenceListType open_references;
47 ReferenceListType closed_references; 51 ReferenceListType closed_references;
52 std::mutex list_lock;
48 size_t num_open_files{}; 53 size_t num_open_files{};
49 54
50private: 55private:
51 friend class RealVfsFile; 56 friend class RealVfsFile;
52 void RefreshReference(const std::string& path, Mode perms, FileReference& reference); 57 std::unique_lock<std::mutex> RefreshReference(const std::string& path, Mode perms,
58 FileReference& reference);
53 void DropReference(std::unique_ptr<FileReference>&& reference); 59 void DropReference(std::unique_ptr<FileReference>&& reference);
54 void EvictSingleReference();
55 60
56private: 61private:
57 void InsertReferenceIntoList(FileReference& reference); 62 friend class RealVfsDirectory;
58 void RemoveReferenceFromList(FileReference& reference); 63 VirtualFile OpenFileFromEntry(std::string_view path, std::optional<u64> size,
64 Mode perms = Mode::Read);
65
66private:
67 void EvictSingleReferenceLocked();
68 void InsertReferenceIntoListLocked(FileReference& reference);
69 void RemoveReferenceFromListLocked(FileReference& reference);
59}; 70};
60 71
61// An implementation of VfsFile that represents a file on the user's computer. 72// An implementation of VfsFile that represents a file on the user's computer.
@@ -78,13 +89,14 @@ public:
78 89
79private: 90private:
80 RealVfsFile(RealVfsFilesystem& base, std::unique_ptr<FileReference> reference, 91 RealVfsFile(RealVfsFilesystem& base, std::unique_ptr<FileReference> reference,
81 const std::string& path, Mode perms = Mode::Read); 92 const std::string& path, Mode perms = Mode::Read, std::optional<u64> size = {});
82 93
83 RealVfsFilesystem& base; 94 RealVfsFilesystem& base;
84 std::unique_ptr<FileReference> reference; 95 std::unique_ptr<FileReference> reference;
85 std::string path; 96 std::string path;
86 std::string parent_path; 97 std::string parent_path;
87 std::vector<std::string> path_components; 98 std::vector<std::string> path_components;
99 std::optional<u64> size;
88 Mode perms; 100 Mode perms;
89}; 101};
90 102
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index faa12b4f0..75ce5a23c 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -184,7 +184,8 @@ u64 KScheduler::UpdateHighestPriorityThread(KThread* highest_thread) {
184 prev_highest_thread != highest_thread) [[likely]] { 184 prev_highest_thread != highest_thread) [[likely]] {
185 if (prev_highest_thread != nullptr) [[likely]] { 185 if (prev_highest_thread != nullptr) [[likely]] {
186 IncrementScheduledCount(prev_highest_thread); 186 IncrementScheduledCount(prev_highest_thread);
187 prev_highest_thread->SetLastScheduledTick(m_kernel.System().CoreTiming().GetCPUTicks()); 187 prev_highest_thread->SetLastScheduledTick(
188 m_kernel.System().CoreTiming().GetClockTicks());
188 } 189 }
189 if (m_state.should_count_idle) { 190 if (m_state.should_count_idle) {
190 if (highest_thread != nullptr) [[likely]] { 191 if (highest_thread != nullptr) [[likely]] {
@@ -351,7 +352,7 @@ void KScheduler::SwitchThread(KThread* next_thread) {
351 352
352 // Update the CPU time tracking variables. 353 // Update the CPU time tracking variables.
353 const s64 prev_tick = m_last_context_switch_time; 354 const s64 prev_tick = m_last_context_switch_time;
354 const s64 cur_tick = m_kernel.System().CoreTiming().GetCPUTicks(); 355 const s64 cur_tick = m_kernel.System().CoreTiming().GetClockTicks();
355 const s64 tick_diff = cur_tick - prev_tick; 356 const s64 tick_diff = cur_tick - prev_tick;
356 cur_thread->AddCpuTime(m_core_id, tick_diff); 357 cur_thread->AddCpuTime(m_core_id, tick_diff);
357 if (cur_process != nullptr) { 358 if (cur_process != nullptr) {
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
index b7da3eee7..3e5b735b1 100644
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -3,6 +3,7 @@
3 3
4#include "common/assert.h" 4#include "common/assert.h"
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "common/scratch_buffer.h"
6#include "core/hle/kernel/k_scheduler.h" 7#include "core/hle/kernel/k_scheduler.h"
7#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" 8#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
8#include "core/hle/kernel/k_synchronization_object.h" 9#include "core/hle/kernel/k_synchronization_object.h"
@@ -75,7 +76,7 @@ Result KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
75 KSynchronizationObject** objects, const s32 num_objects, 76 KSynchronizationObject** objects, const s32 num_objects,
76 s64 timeout) { 77 s64 timeout) {
77 // Allocate space on stack for thread nodes. 78 // Allocate space on stack for thread nodes.
78 std::vector<ThreadListNode> thread_nodes(num_objects); 79 std::array<ThreadListNode, Svc::ArgumentHandleCountMax> thread_nodes;
79 80
80 // Prepare for wait. 81 // Prepare for wait.
81 KThread* thread = GetCurrentThreadPointer(kernel); 82 KThread* thread = GetCurrentThreadPointer(kernel);
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index 70480b725..adb6ec581 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -4,6 +4,8 @@
4#include <algorithm> 4#include <algorithm>
5#include <atomic> 5#include <atomic>
6#include <cinttypes> 6#include <cinttypes>
7#include <condition_variable>
8#include <mutex>
7#include <optional> 9#include <optional>
8#include <vector> 10#include <vector>
9 11
@@ -907,7 +909,7 @@ Result KThread::SetActivity(Svc::ThreadActivity activity) {
907 R_SUCCEED(); 909 R_SUCCEED();
908} 910}
909 911
910Result KThread::GetThreadContext3(std::vector<u8>& out) { 912Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) {
911 // Lock ourselves. 913 // Lock ourselves.
912 KScopedLightLock lk{m_activity_pause_lock}; 914 KScopedLightLock lk{m_activity_pause_lock};
913 915
@@ -925,15 +927,13 @@ Result KThread::GetThreadContext3(std::vector<u8>& out) {
925 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. 927 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
926 auto context = GetContext64(); 928 auto context = GetContext64();
927 context.pstate &= 0xFF0FFE20; 929 context.pstate &= 0xFF0FFE20;
928 930 out.resize_destructive(sizeof(context));
929 out.resize(sizeof(context));
930 std::memcpy(out.data(), std::addressof(context), sizeof(context)); 931 std::memcpy(out.data(), std::addressof(context), sizeof(context));
931 } else { 932 } else {
932 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. 933 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
933 auto context = GetContext32(); 934 auto context = GetContext32();
934 context.cpsr &= 0xFF0FFE20; 935 context.cpsr &= 0xFF0FFE20;
935 936 out.resize_destructive(sizeof(context));
936 out.resize(sizeof(context));
937 std::memcpy(out.data(), std::addressof(context), sizeof(context)); 937 std::memcpy(out.data(), std::addressof(context), sizeof(context));
938 } 938 }
939 } 939 }
@@ -1313,7 +1313,8 @@ void KThread::RequestDummyThreadWait() {
1313 ASSERT(this->IsDummyThread()); 1313 ASSERT(this->IsDummyThread());
1314 1314
1315 // We will block when the scheduler lock is released. 1315 // We will block when the scheduler lock is released.
1316 m_dummy_thread_runnable.store(false); 1316 std::scoped_lock lock{m_dummy_thread_mutex};
1317 m_dummy_thread_runnable = false;
1317} 1318}
1318 1319
1319void KThread::DummyThreadBeginWait() { 1320void KThread::DummyThreadBeginWait() {
@@ -1323,7 +1324,8 @@ void KThread::DummyThreadBeginWait() {
1323 } 1324 }
1324 1325
1325 // Block until runnable is no longer false. 1326 // Block until runnable is no longer false.
1326 m_dummy_thread_runnable.wait(false); 1327 std::unique_lock lock{m_dummy_thread_mutex};
1328 m_dummy_thread_cv.wait(lock, [this] { return m_dummy_thread_runnable; });
1327} 1329}
1328 1330
1329void KThread::DummyThreadEndWait() { 1331void KThread::DummyThreadEndWait() {
@@ -1331,8 +1333,11 @@ void KThread::DummyThreadEndWait() {
1331 ASSERT(this->IsDummyThread()); 1333 ASSERT(this->IsDummyThread());
1332 1334
1333 // Wake up the waiting thread. 1335 // Wake up the waiting thread.
1334 m_dummy_thread_runnable.store(true); 1336 {
1335 m_dummy_thread_runnable.notify_one(); 1337 std::scoped_lock lock{m_dummy_thread_mutex};
1338 m_dummy_thread_runnable = true;
1339 }
1340 m_dummy_thread_cv.notify_one();
1336} 1341}
1337 1342
1338void KThread::BeginWait(KThreadQueue* queue) { 1343void KThread::BeginWait(KThreadQueue* queue) {
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index f9814ac8f..dd662b3f8 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -15,6 +15,7 @@
15#include "common/intrusive_list.h" 15#include "common/intrusive_list.h"
16 16
17#include "common/intrusive_red_black_tree.h" 17#include "common/intrusive_red_black_tree.h"
18#include "common/scratch_buffer.h"
18#include "common/spin_lock.h" 19#include "common/spin_lock.h"
19#include "core/arm/arm_interface.h" 20#include "core/arm/arm_interface.h"
20#include "core/hle/kernel/k_affinity_mask.h" 21#include "core/hle/kernel/k_affinity_mask.h"
@@ -567,7 +568,7 @@ public:
567 568
568 void RemoveWaiter(KThread* thread); 569 void RemoveWaiter(KThread* thread);
569 570
570 Result GetThreadContext3(std::vector<u8>& out); 571 Result GetThreadContext3(Common::ScratchBuffer<u8>& out);
571 572
572 KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) { 573 KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) {
573 return this->RemoveWaiterByKey(out_has_waiters, key, false); 574 return this->RemoveWaiterByKey(out_has_waiters, key, false);
@@ -892,7 +893,9 @@ private:
892 std::shared_ptr<Common::Fiber> m_host_context{}; 893 std::shared_ptr<Common::Fiber> m_host_context{};
893 ThreadType m_thread_type{}; 894 ThreadType m_thread_type{};
894 StepState m_step_state{}; 895 StepState m_step_state{};
895 std::atomic<bool> m_dummy_thread_runnable{true}; 896 bool m_dummy_thread_runnable{true};
897 std::mutex m_dummy_thread_mutex{};
898 std::condition_variable m_dummy_thread_cv{};
896 899
897 // For debugging 900 // For debugging
898 std::vector<KSynchronizationObject*> m_wait_objects_for_debugging{}; 901 std::vector<KSynchronizationObject*> m_wait_objects_for_debugging{};
diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp
index 2b2c878b5..445cdd87b 100644
--- a/src/core/hle/kernel/svc/svc_info.cpp
+++ b/src/core/hle/kernel/svc/svc_info.cpp
@@ -199,9 +199,9 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle
199 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { 199 if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
200 const u64 thread_ticks = current_thread->GetCpuTime(); 200 const u64 thread_ticks = current_thread->GetCpuTime();
201 201
202 out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); 202 out_ticks = thread_ticks + (core_timing.GetClockTicks() - prev_ctx_ticks);
203 } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) { 203 } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) {
204 out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks; 204 out_ticks = core_timing.GetClockTicks() - prev_ctx_ticks;
205 } 205 }
206 206
207 *result = out_ticks; 207 *result = out_ticks;
diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp
index ea03068aa..60247df2e 100644
--- a/src/core/hle/kernel/svc/svc_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_ipc.cpp
@@ -2,6 +2,7 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/scope_exit.h" 4#include "common/scope_exit.h"
5#include "common/scratch_buffer.h"
5#include "core/core.h" 6#include "core/core.h"
6#include "core/hle/kernel/k_client_session.h" 7#include "core/hle/kernel/k_client_session.h"
7#include "core/hle/kernel/k_process.h" 8#include "core/hle/kernel/k_process.h"
@@ -45,11 +46,11 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
45 handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)), 46 handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)),
46 ResultInvalidPointer); 47 ResultInvalidPointer);
47 48
48 std::vector<Handle> handles(num_handles); 49 std::array<Handle, Svc::ArgumentHandleCountMax> handles;
49 GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles); 50 GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles);
50 51
51 // Convert handle list to object table. 52 // Convert handle list to object table.
52 std::vector<KSynchronizationObject*> objs(num_handles); 53 std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
53 R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(), 54 R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(),
54 num_handles), 55 num_handles),
55 ResultInvalidHandle); 56 ResultInvalidHandle);
@@ -80,7 +81,7 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
80 // Wait for an object. 81 // Wait for an object.
81 s32 index; 82 s32 index;
82 Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(), 83 Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(),
83 static_cast<s32>(objs.size()), timeout_ns); 84 num_handles, timeout_ns);
84 if (result == ResultTimedOut) { 85 if (result == ResultTimedOut) {
85 R_RETURN(result); 86 R_RETURN(result);
86 } 87 }
diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp
index 04d65f0bd..53df5bcd8 100644
--- a/src/core/hle/kernel/svc/svc_synchronization.cpp
+++ b/src/core/hle/kernel/svc/svc_synchronization.cpp
@@ -2,6 +2,7 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/scope_exit.h" 4#include "common/scope_exit.h"
5#include "common/scratch_buffer.h"
5#include "core/core.h" 6#include "core/core.h"
6#include "core/hle/kernel/k_process.h" 7#include "core/hle/kernel/k_process.h"
7#include "core/hle/kernel/k_readable_event.h" 8#include "core/hle/kernel/k_readable_event.h"
@@ -54,7 +55,7 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
54 // Get the synchronization context. 55 // Get the synchronization context.
55 auto& kernel = system.Kernel(); 56 auto& kernel = system.Kernel();
56 auto& handle_table = GetCurrentProcess(kernel).GetHandleTable(); 57 auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
57 std::vector<KSynchronizationObject*> objs(num_handles); 58 std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
58 59
59 // Copy user handles. 60 // Copy user handles.
60 if (num_handles > 0) { 61 if (num_handles > 0) {
@@ -72,8 +73,8 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
72 }); 73 });
73 74
74 // Wait on the objects. 75 // Wait on the objects.
75 Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(), 76 Result res =
76 static_cast<s32>(objs.size()), timeout_ns); 77 KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns);
77 78
78 R_SUCCEED_IF(res == ResultSessionClosed); 79 R_SUCCEED_IF(res == ResultSessionClosed);
79 R_RETURN(res); 80 R_RETURN(res);
@@ -87,8 +88,7 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha
87 88
88 // Ensure number of handles is valid. 89 // Ensure number of handles is valid.
89 R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange); 90 R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
90 91 std::array<Handle, Svc::ArgumentHandleCountMax> handles;
91 std::vector<Handle> handles(num_handles);
92 if (num_handles > 0) { 92 if (num_handles > 0) {
93 GetCurrentMemory(system.Kernel()) 93 GetCurrentMemory(system.Kernel())
94 .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle)); 94 .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle));
diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp
index 37b54079c..36b94e6bf 100644
--- a/src/core/hle/kernel/svc/svc_thread.cpp
+++ b/src/core/hle/kernel/svc/svc_thread.cpp
@@ -174,7 +174,7 @@ Result GetThreadContext3(Core::System& system, u64 out_context, Handle thread_ha
174 } 174 }
175 175
176 // Get the thread context. 176 // Get the thread context.
177 std::vector<u8> context; 177 static thread_local Common::ScratchBuffer<u8> context;
178 R_TRY(thread->GetThreadContext3(context)); 178 R_TRY(thread->GetThreadContext3(context));
179 179
180 // Copy the thread context to user space. 180 // Copy the thread context to user space.
diff --git a/src/core/hle/kernel/svc/svc_tick.cpp b/src/core/hle/kernel/svc/svc_tick.cpp
index 561336482..7dd7c6e51 100644
--- a/src/core/hle/kernel/svc/svc_tick.cpp
+++ b/src/core/hle/kernel/svc/svc_tick.cpp
@@ -12,16 +12,8 @@ namespace Kernel::Svc {
12int64_t GetSystemTick(Core::System& system) { 12int64_t GetSystemTick(Core::System& system) {
13 LOG_TRACE(Kernel_SVC, "called"); 13 LOG_TRACE(Kernel_SVC, "called");
14 14
15 auto& core_timing = system.CoreTiming();
16
17 // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) 15 // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
18 const u64 result{core_timing.GetClockTicks()}; 16 return static_cast<int64_t>(system.CoreTiming().GetClockTicks());
19
20 if (!system.Kernel().IsMulticore()) {
21 core_timing.AddTicks(400U);
22 }
23
24 return static_cast<int64_t>(result);
25} 17}
26 18
27int64_t GetSystemTick64(Core::System& system) { 19int64_t GetSystemTick64(Core::System& system) {
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index f0640c64f..c8d574993 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -5,6 +5,7 @@
5#include "audio_core/renderer/audio_device.h" 5#include "audio_core/renderer/audio_device.h"
6#include "common/common_funcs.h" 6#include "common/common_funcs.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/settings.h"
8#include "common/string_util.h" 9#include "common/string_util.h"
9#include "core/core.h" 10#include "core/core.h"
10#include "core/hle/kernel/k_event.h" 11#include "core/hle/kernel/k_event.h"
@@ -123,19 +124,13 @@ private:
123 124
124 void GetReleasedAudioInBuffer(HLERequestContext& ctx) { 125 void GetReleasedAudioInBuffer(HLERequestContext& ctx) {
125 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); 126 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
126 std::vector<u64> released_buffers(write_buffer_size); 127 tmp_buffer.resize_destructive(write_buffer_size);
128 tmp_buffer[0] = 0;
127 129
128 const auto count = impl->GetReleasedBuffers(released_buffers); 130 const auto count = impl->GetReleasedBuffers(tmp_buffer);
129 131
130 [[maybe_unused]] std::string tags{}; 132 ctx.WriteBuffer(tmp_buffer);
131 for (u32 i = 0; i < count; i++) {
132 tags += fmt::format("{:08X}, ", released_buffers[i]);
133 }
134 [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()};
135 LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
136 tags);
137 133
138 ctx.WriteBuffer(released_buffers);
139 IPC::ResponseBuilder rb{ctx, 3}; 134 IPC::ResponseBuilder rb{ctx, 3};
140 rb.Push(ResultSuccess); 135 rb.Push(ResultSuccess);
141 rb.Push(count); 136 rb.Push(count);
@@ -200,6 +195,7 @@ private:
200 KernelHelpers::ServiceContext service_context; 195 KernelHelpers::ServiceContext service_context;
201 Kernel::KEvent* event; 196 Kernel::KEvent* event;
202 std::shared_ptr<AudioCore::AudioIn::In> impl; 197 std::shared_ptr<AudioCore::AudioIn::In> impl;
198 Common::ScratchBuffer<u64> tmp_buffer;
203}; 199};
204 200
205AudInU::AudInU(Core::System& system_) 201AudInU::AudInU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 3e62fa4fc..032c8c11f 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -123,19 +123,13 @@ private:
123 123
124 void GetReleasedAudioOutBuffers(HLERequestContext& ctx) { 124 void GetReleasedAudioOutBuffers(HLERequestContext& ctx) {
125 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); 125 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
126 std::vector<u64> released_buffers(write_buffer_size); 126 tmp_buffer.resize_destructive(write_buffer_size);
127 tmp_buffer[0] = 0;
127 128
128 const auto count = impl->GetReleasedBuffers(released_buffers); 129 const auto count = impl->GetReleasedBuffers(tmp_buffer);
129 130
130 [[maybe_unused]] std::string tags{}; 131 ctx.WriteBuffer(tmp_buffer);
131 for (u32 i = 0; i < count; i++) {
132 tags += fmt::format("{:08X}, ", released_buffers[i]);
133 }
134 [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()};
135 LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
136 tags);
137 132
138 ctx.WriteBuffer(released_buffers);
139 IPC::ResponseBuilder rb{ctx, 3}; 133 IPC::ResponseBuilder rb{ctx, 3};
140 rb.Push(ResultSuccess); 134 rb.Push(ResultSuccess);
141 rb.Push(count); 135 rb.Push(count);
@@ -211,6 +205,7 @@ private:
211 KernelHelpers::ServiceContext service_context; 205 KernelHelpers::ServiceContext service_context;
212 Kernel::KEvent* event; 206 Kernel::KEvent* event;
213 std::shared_ptr<AudioCore::AudioOut::Out> impl; 207 std::shared_ptr<AudioCore::AudioOut::Out> impl;
208 Common::ScratchBuffer<u64> tmp_buffer;
214}; 209};
215 210
216AudOutU::AudOutU(Core::System& system_) 211AudOutU::AudOutU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 7086d4750..12845c23a 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -116,28 +116,26 @@ private:
116 // These buffers are written manually to avoid an issue with WriteBuffer throwing errors for 116 // These buffers are written manually to avoid an issue with WriteBuffer throwing errors for
117 // checking size 0. Performance size is 0 for most games. 117 // checking size 0. Performance size is 0 for most games.
118 118
119 std::vector<u8> output{};
120 std::vector<u8> performance{};
121 auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0}; 119 auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0};
122 if (is_buffer_b) { 120 if (is_buffer_b) {
123 const auto buffersB{ctx.BufferDescriptorB()}; 121 const auto buffersB{ctx.BufferDescriptorB()};
124 output.resize(buffersB[0].Size(), 0); 122 tmp_output.resize_destructive(buffersB[0].Size());
125 performance.resize(buffersB[1].Size(), 0); 123 tmp_performance.resize_destructive(buffersB[1].Size());
126 } else { 124 } else {
127 const auto buffersC{ctx.BufferDescriptorC()}; 125 const auto buffersC{ctx.BufferDescriptorC()};
128 output.resize(buffersC[0].Size(), 0); 126 tmp_output.resize_destructive(buffersC[0].Size());
129 performance.resize(buffersC[1].Size(), 0); 127 tmp_performance.resize_destructive(buffersC[1].Size());
130 } 128 }
131 129
132 auto result = impl->RequestUpdate(input, performance, output); 130 auto result = impl->RequestUpdate(input, tmp_performance, tmp_output);
133 131
134 if (result.IsSuccess()) { 132 if (result.IsSuccess()) {
135 if (is_buffer_b) { 133 if (is_buffer_b) {
136 ctx.WriteBufferB(output.data(), output.size(), 0); 134 ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0);
137 ctx.WriteBufferB(performance.data(), performance.size(), 1); 135 ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1);
138 } else { 136 } else {
139 ctx.WriteBufferC(output.data(), output.size(), 0); 137 ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0);
140 ctx.WriteBufferC(performance.data(), performance.size(), 1); 138 ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1);
141 } 139 }
142 } else { 140 } else {
143 LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description); 141 LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description);
@@ -235,6 +233,8 @@ private:
235 Kernel::KEvent* rendered_event; 233 Kernel::KEvent* rendered_event;
236 Manager& manager; 234 Manager& manager;
237 std::unique_ptr<Renderer> impl; 235 std::unique_ptr<Renderer> impl;
236 Common::ScratchBuffer<u8> tmp_output;
237 Common::ScratchBuffer<u8> tmp_performance;
238}; 238};
239 239
240class IAudioDevice final : public ServiceFramework<IAudioDevice> { 240class IAudioDevice final : public ServiceFramework<IAudioDevice> {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 24ce37e87..d8e9c8719 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -4,6 +4,7 @@
4#pragma once 4#pragma once
5 5
6#include "audio_core/audio_render_manager.h" 6#include "audio_core/audio_render_manager.h"
7#include "common/scratch_buffer.h"
7#include "core/hle/service/kernel_helpers.h" 8#include "core/hle/service/kernel_helpers.h"
8#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
9 10
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 451ac224a..c835f6cb7 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -68,13 +68,13 @@ private:
68 ExtraBehavior extra_behavior) { 68 ExtraBehavior extra_behavior) {
69 u32 consumed = 0; 69 u32 consumed = 0;
70 u32 sample_count = 0; 70 u32 sample_count = 0;
71 std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>()); 71 tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
72 72
73 if (extra_behavior == ExtraBehavior::ResetContext) { 73 if (extra_behavior == ExtraBehavior::ResetContext) {
74 ResetDecoderContext(); 74 ResetDecoderContext();
75 } 75 }
76 76
77 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { 77 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) {
78 LOG_ERROR(Audio, "Failed to decode opus data"); 78 LOG_ERROR(Audio, "Failed to decode opus data");
79 IPC::ResponseBuilder rb{ctx, 2}; 79 IPC::ResponseBuilder rb{ctx, 2};
80 // TODO(ogniK): Use correct error code 80 // TODO(ogniK): Use correct error code
@@ -90,11 +90,11 @@ private:
90 if (performance) { 90 if (performance) {
91 rb.Push<u64>(*performance); 91 rb.Push<u64>(*performance);
92 } 92 }
93 ctx.WriteBuffer(samples); 93 ctx.WriteBuffer(tmp_samples);
94 } 94 }
95 95
96 bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input, 96 bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input,
97 std::vector<opus_int16>& output, u64* out_performance_time) const { 97 std::span<opus_int16> output, u64* out_performance_time) const {
98 const auto start_time = std::chrono::steady_clock::now(); 98 const auto start_time = std::chrono::steady_clock::now();
99 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 99 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
100 if (sizeof(OpusPacketHeader) > input.size()) { 100 if (sizeof(OpusPacketHeader) > input.size()) {
@@ -154,6 +154,7 @@ private:
154 OpusDecoderPtr decoder; 154 OpusDecoderPtr decoder;
155 u32 sample_rate; 155 u32 sample_rate;
156 u32 channel_count; 156 u32 channel_count;
157 Common::ScratchBuffer<opus_int16> tmp_samples;
157}; 158};
158 159
159class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 160class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
diff --git a/src/core/hle/service/hid/hidbus.cpp b/src/core/hle/service/hid/hidbus.cpp
index 5604a6fda..80aac221b 100644
--- a/src/core/hle/service/hid/hidbus.cpp
+++ b/src/core/hle/service/hid/hidbus.cpp
@@ -5,7 +5,6 @@
5#include "common/settings.h" 5#include "common/settings.h"
6#include "core/core.h" 6#include "core/core.h"
7#include "core/core_timing.h" 7#include "core/core_timing.h"
8#include "core/core_timing_util.h"
9#include "core/hid/hid_types.h" 8#include "core/hid/hid_types.h"
10#include "core/hle/kernel/k_event.h" 9#include "core/hle/kernel/k_event.h"
11#include "core/hle/kernel/k_readable_event.h" 10#include "core/hle/kernel/k_readable_event.h"
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index ab1f30f9e..a04538d5d 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -34,7 +34,7 @@ public:
34 * @returns The result code of the ioctl. 34 * @returns The result code of the ioctl.
35 */ 35 */
36 virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 36 virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
37 std::vector<u8>& output) = 0; 37 std::span<u8> output) = 0;
38 38
39 /** 39 /**
40 * Handles an ioctl2 request. 40 * Handles an ioctl2 request.
@@ -45,7 +45,7 @@ public:
45 * @returns The result code of the ioctl. 45 * @returns The result code of the ioctl.
46 */ 46 */
47 virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 47 virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
48 std::span<const u8> inline_input, std::vector<u8>& output) = 0; 48 std::span<const u8> inline_input, std::span<u8> output) = 0;
49 49
50 /** 50 /**
51 * Handles an ioctl3 request. 51 * Handles an ioctl3 request.
@@ -56,7 +56,7 @@ public:
56 * @returns The result code of the ioctl. 56 * @returns The result code of the ioctl.
57 */ 57 */
58 virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 58 virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
59 std::vector<u8>& output, std::vector<u8>& inline_output) = 0; 59 std::span<u8> output, std::span<u8> inline_output) = 0;
60 60
61 /** 61 /**
62 * Called once a device is opened 62 * Called once a device is opened
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 5a5b2e305..05a43d8dc 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -18,19 +18,19 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
18nvdisp_disp0::~nvdisp_disp0() = default; 18nvdisp_disp0::~nvdisp_disp0() = default;
19 19
20NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 20NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
21 std::vector<u8>& output) { 21 std::span<u8> output) {
22 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 22 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
23 return NvResult::NotImplemented; 23 return NvResult::NotImplemented;
24} 24}
25 25
26NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 26NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
27 std::span<const u8> inline_input, std::vector<u8>& output) { 27 std::span<const u8> inline_input, std::span<u8> output) {
28 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 28 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
29 return NvResult::NotImplemented; 29 return NvResult::NotImplemented;
30} 30}
31 31
32NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 32NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
33 std::vector<u8>& output, std::vector<u8>& inline_output) { 33 std::span<u8> output, std::span<u8> inline_output) {
34 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 34 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
35 return NvResult::NotImplemented; 35 return NvResult::NotImplemented;
36} 36}
@@ -51,8 +51,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
51 stride, format, transform, crop_rect}; 51 stride, format, transform, crop_rect};
52 52
53 system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); 53 system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
54 system.GetPerfStats().EndSystemFrame();
55 system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); 54 system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
55 system.GetPerfStats().EndSystemFrame();
56 system.GetPerfStats().BeginSystemFrame(); 56 system.GetPerfStats().BeginSystemFrame();
57} 57}
58 58
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index bcd0e3ed5..daee05fe8 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -26,11 +26,11 @@ public:
26 ~nvdisp_disp0() override; 26 ~nvdisp_disp0() override;
27 27
28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
29 std::vector<u8>& output) override; 29 std::span<u8> output) override;
30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
31 std::span<const u8> inline_input, std::vector<u8>& output) override; 31 std::span<const u8> inline_input, std::span<u8> output) override;
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::vector<u8>& inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 681bd0867..07e570a9f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -28,7 +28,7 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Con
28nvhost_as_gpu::~nvhost_as_gpu() = default; 28nvhost_as_gpu::~nvhost_as_gpu() = default;
29 29
30NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 30NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
31 std::vector<u8>& output) { 31 std::span<u8> output) {
32 switch (command.group) { 32 switch (command.group) {
33 case 'A': 33 case 'A':
34 switch (command.cmd) { 34 switch (command.cmd) {
@@ -61,13 +61,13 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i
61} 61}
62 62
63NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 63NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
64 std::span<const u8> inline_input, std::vector<u8>& output) { 64 std::span<const u8> inline_input, std::span<u8> output) {
65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
66 return NvResult::NotImplemented; 66 return NvResult::NotImplemented;
67} 67}
68 68
69NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 69NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
70 std::vector<u8>& output, std::vector<u8>& inline_output) { 70 std::span<u8> output, std::span<u8> inline_output) {
71 switch (command.group) { 71 switch (command.group) {
72 case 'A': 72 case 'A':
73 switch (command.cmd) { 73 switch (command.cmd) {
@@ -87,7 +87,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
87void nvhost_as_gpu::OnOpen(DeviceFD fd) {} 87void nvhost_as_gpu::OnOpen(DeviceFD fd) {}
88void nvhost_as_gpu::OnClose(DeviceFD fd) {} 88void nvhost_as_gpu::OnClose(DeviceFD fd) {}
89 89
90NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& output) { 90NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::span<u8> output) {
91 IoctlAllocAsEx params{}; 91 IoctlAllocAsEx params{};
92 std::memcpy(&params, input.data(), input.size()); 92 std::memcpy(&params, input.data(), input.size());
93 93
@@ -141,7 +141,7 @@ NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& ou
141 return NvResult::Success; 141 return NvResult::Success;
142} 142}
143 143
144NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::vector<u8>& output) { 144NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::span<u8> output) {
145 IoctlAllocSpace params{}; 145 IoctlAllocSpace params{};
146 std::memcpy(&params, input.data(), input.size()); 146 std::memcpy(&params, input.data(), input.size());
147 147
@@ -220,7 +220,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
220 mapping_map.erase(offset); 220 mapping_map.erase(offset);
221} 221}
222 222
223NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& output) { 223NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::span<u8> output) {
224 IoctlFreeSpace params{}; 224 IoctlFreeSpace params{};
225 std::memcpy(&params, input.data(), input.size()); 225 std::memcpy(&params, input.data(), input.size());
226 226
@@ -266,15 +266,14 @@ NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& ou
266 return NvResult::Success; 266 return NvResult::Success;
267} 267}
268 268
269NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output) { 269NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::span<u8> output) {
270 const auto num_entries = input.size() / sizeof(IoctlRemapEntry); 270 const auto num_entries = input.size() / sizeof(IoctlRemapEntry);
271 271
272 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); 272 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
273 273
274 std::vector<IoctlRemapEntry> entries(num_entries);
275 std::memcpy(entries.data(), input.data(), input.size());
276
277 std::scoped_lock lock(mutex); 274 std::scoped_lock lock(mutex);
275 entries.resize_destructive(num_entries);
276 std::memcpy(entries.data(), input.data(), input.size());
278 277
279 if (!vm.initialised) { 278 if (!vm.initialised) {
280 return NvResult::BadValue; 279 return NvResult::BadValue;
@@ -320,7 +319,7 @@ NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output
320 return NvResult::Success; 319 return NvResult::Success;
321} 320}
322 321
323NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>& output) { 322NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::span<u8> output) {
324 IoctlMapBufferEx params{}; 323 IoctlMapBufferEx params{};
325 std::memcpy(&params, input.data(), input.size()); 324 std::memcpy(&params, input.data(), input.size());
326 325
@@ -424,7 +423,7 @@ NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>&
424 return NvResult::Success; 423 return NvResult::Success;
425} 424}
426 425
427NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { 426NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
428 IoctlUnmapBuffer params{}; 427 IoctlUnmapBuffer params{};
429 std::memcpy(&params, input.data(), input.size()); 428 std::memcpy(&params, input.data(), input.size());
430 429
@@ -463,7 +462,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>&
463 return NvResult::Success; 462 return NvResult::Success;
464} 463}
465 464
466NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::vector<u8>& output) { 465NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::span<u8> output) {
467 IoctlBindChannel params{}; 466 IoctlBindChannel params{};
468 std::memcpy(&params, input.data(), input.size()); 467 std::memcpy(&params, input.data(), input.size());
469 LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); 468 LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
@@ -492,7 +491,7 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
492 }; 491 };
493} 492}
494 493
495NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output) { 494NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output) {
496 IoctlGetVaRegions params{}; 495 IoctlGetVaRegions params{};
497 std::memcpy(&params, input.data(), input.size()); 496 std::memcpy(&params, input.data(), input.size());
498 497
@@ -511,8 +510,8 @@ NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>&
511 return NvResult::Success; 510 return NvResult::Success;
512} 511}
513 512
514NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output, 513NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output,
515 std::vector<u8>& inline_output) { 514 std::span<u8> inline_output) {
516 IoctlGetVaRegions params{}; 515 IoctlGetVaRegions params{};
517 std::memcpy(&params, input.data(), input.size()); 516 std::memcpy(&params, input.data(), input.size());
518 517
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 1aba8d579..2af3e1260 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -15,6 +15,7 @@
15#include "common/address_space.h" 15#include "common/address_space.h"
16#include "common/common_funcs.h" 16#include "common/common_funcs.h"
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "common/scratch_buffer.h"
18#include "common/swap.h" 19#include "common/swap.h"
19#include "core/hle/service/nvdrv/core/nvmap.h" 20#include "core/hle/service/nvdrv/core/nvmap.h"
20#include "core/hle/service/nvdrv/devices/nvdevice.h" 21#include "core/hle/service/nvdrv/devices/nvdevice.h"
@@ -48,11 +49,11 @@ public:
48 ~nvhost_as_gpu() override; 49 ~nvhost_as_gpu() override;
49 50
50 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 51 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
51 std::vector<u8>& output) override; 52 std::span<u8> output) override;
52 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 53 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
53 std::span<const u8> inline_input, std::vector<u8>& output) override; 54 std::span<const u8> inline_input, std::span<u8> output) override;
54 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 55 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
55 std::vector<u8>& inline_output) override; 56 std::span<u8> inline_output) override;
56 57
57 void OnOpen(DeviceFD fd) override; 58 void OnOpen(DeviceFD fd) override;
58 void OnClose(DeviceFD fd) override; 59 void OnClose(DeviceFD fd) override;
@@ -138,18 +139,18 @@ private:
138 static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, 139 static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
139 "IoctlGetVaRegions is incorrect size"); 140 "IoctlGetVaRegions is incorrect size");
140 141
141 NvResult AllocAsEx(std::span<const u8> input, std::vector<u8>& output); 142 NvResult AllocAsEx(std::span<const u8> input, std::span<u8> output);
142 NvResult AllocateSpace(std::span<const u8> input, std::vector<u8>& output); 143 NvResult AllocateSpace(std::span<const u8> input, std::span<u8> output);
143 NvResult Remap(std::span<const u8> input, std::vector<u8>& output); 144 NvResult Remap(std::span<const u8> input, std::span<u8> output);
144 NvResult MapBufferEx(std::span<const u8> input, std::vector<u8>& output); 145 NvResult MapBufferEx(std::span<const u8> input, std::span<u8> output);
145 NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); 146 NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
146 NvResult FreeSpace(std::span<const u8> input, std::vector<u8>& output); 147 NvResult FreeSpace(std::span<const u8> input, std::span<u8> output);
147 NvResult BindChannel(std::span<const u8> input, std::vector<u8>& output); 148 NvResult BindChannel(std::span<const u8> input, std::span<u8> output);
148 149
149 void GetVARegionsImpl(IoctlGetVaRegions& params); 150 void GetVARegionsImpl(IoctlGetVaRegions& params);
150 NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output); 151 NvResult GetVARegions(std::span<const u8> input, std::span<u8> output);
151 NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output, 152 NvResult GetVARegions(std::span<const u8> input, std::span<u8> output,
152 std::vector<u8>& inline_output); 153 std::span<u8> inline_output);
153 154
154 void FreeMappingLocked(u64 offset); 155 void FreeMappingLocked(u64 offset);
155 156
@@ -212,6 +213,7 @@ private:
212 bool initialised{}; 213 bool initialised{};
213 } vm; 214 } vm;
214 std::shared_ptr<Tegra::MemoryManager> gmmu; 215 std::shared_ptr<Tegra::MemoryManager> gmmu;
216 Common::ScratchBuffer<IoctlRemapEntry> entries;
215 217
216 // s32 channel{}; 218 // s32 channel{};
217 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; 219 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index e12025560..4d55554b4 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -35,7 +35,7 @@ nvhost_ctrl::~nvhost_ctrl() {
35} 35}
36 36
37NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 37NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
38 std::vector<u8>& output) { 38 std::span<u8> output) {
39 switch (command.group) { 39 switch (command.group) {
40 case 0x0: 40 case 0x0:
41 switch (command.cmd) { 41 switch (command.cmd) {
@@ -64,13 +64,13 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inp
64} 64}
65 65
66NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 66NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
67 std::span<const u8> inline_input, std::vector<u8>& output) { 67 std::span<const u8> inline_input, std::span<u8> output) {
68 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 68 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
69 return NvResult::NotImplemented; 69 return NvResult::NotImplemented;
70} 70}
71 71
72NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 72NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
73 std::vector<u8>& output, std::vector<u8>& inline_outpu) { 73 std::span<u8> output, std::span<u8> inline_outpu) {
74 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 74 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
75 return NvResult::NotImplemented; 75 return NvResult::NotImplemented;
76} 76}
@@ -79,7 +79,7 @@ void nvhost_ctrl::OnOpen(DeviceFD fd) {}
79 79
80void nvhost_ctrl::OnClose(DeviceFD fd) {} 80void nvhost_ctrl::OnClose(DeviceFD fd) {}
81 81
82NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output) { 82NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output) {
83 IocGetConfigParams params{}; 83 IocGetConfigParams params{};
84 std::memcpy(&params, input.data(), sizeof(params)); 84 std::memcpy(&params, input.data(), sizeof(params));
85 LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(), 85 LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(),
@@ -87,7 +87,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8
87 return NvResult::ConfigVarNotFound; // Returns error on production mode 87 return NvResult::ConfigVarNotFound; // Returns error on production mode
88} 88}
89 89
90NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, 90NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::span<u8> output,
91 bool is_allocation) { 91 bool is_allocation) {
92 IocCtrlEventWaitParams params{}; 92 IocCtrlEventWaitParams params{};
93 std::memcpy(&params, input.data(), sizeof(params)); 93 std::memcpy(&params, input.data(), sizeof(params));
@@ -231,7 +231,7 @@ NvResult nvhost_ctrl::FreeEvent(u32 slot) {
231 return NvResult::Success; 231 return NvResult::Success;
232} 232}
233 233
234NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output) { 234NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output) {
235 IocCtrlEventRegisterParams params{}; 235 IocCtrlEventRegisterParams params{};
236 std::memcpy(&params, input.data(), sizeof(params)); 236 std::memcpy(&params, input.data(), sizeof(params));
237 const u32 event_id = params.user_event_id; 237 const u32 event_id = params.user_event_id;
@@ -252,7 +252,7 @@ NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vecto
252 return NvResult::Success; 252 return NvResult::Success;
253} 253}
254 254
255NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output) { 255NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output) {
256 IocCtrlEventUnregisterParams params{}; 256 IocCtrlEventUnregisterParams params{};
257 std::memcpy(&params, input.data(), sizeof(params)); 257 std::memcpy(&params, input.data(), sizeof(params));
258 const u32 event_id = params.user_event_id & 0x00FF; 258 const u32 event_id = params.user_event_id & 0x00FF;
@@ -262,8 +262,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vec
262 return FreeEvent(event_id); 262 return FreeEvent(event_id);
263} 263}
264 264
265NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, 265NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output) {
266 std::vector<u8>& output) {
267 IocCtrlEventUnregisterBatchParams params{}; 266 IocCtrlEventUnregisterBatchParams params{};
268 std::memcpy(&params, input.data(), sizeof(params)); 267 std::memcpy(&params, input.data(), sizeof(params));
269 u64 event_mask = params.user_events; 268 u64 event_mask = params.user_events;
@@ -281,7 +280,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input,
281 return NvResult::Success; 280 return NvResult::Success;
282} 281}
283 282
284NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output) { 283NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output) {
285 IocCtrlEventClearParams params{}; 284 IocCtrlEventClearParams params{};
286 std::memcpy(&params, input.data(), sizeof(params)); 285 std::memcpy(&params, input.data(), sizeof(params));
287 286
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index dd2e7888a..2efed4862 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -26,11 +26,11 @@ public:
26 ~nvhost_ctrl() override; 26 ~nvhost_ctrl() override;
27 27
28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
29 std::vector<u8>& output) override; 29 std::span<u8> output) override;
30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
31 std::span<const u8> inline_input, std::vector<u8>& output) override; 31 std::span<const u8> inline_input, std::span<u8> output) override;
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::vector<u8>& inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
@@ -186,13 +186,12 @@ private:
186 static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8, 186 static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8,
187 "IocCtrlEventKill is incorrect size"); 187 "IocCtrlEventKill is incorrect size");
188 188
189 NvResult NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output); 189 NvResult NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output);
190 NvResult IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, 190 NvResult IocCtrlEventWait(std::span<const u8> input, std::span<u8> output, bool is_allocation);
191 bool is_allocation); 191 NvResult IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output);
192 NvResult IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output); 192 NvResult IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output);
193 NvResult IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output); 193 NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output);
194 NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::vector<u8>& output); 194 NvResult IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output);
195 NvResult IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output);
196 195
197 NvResult FreeEvent(u32 slot); 196 NvResult FreeEvent(u32 slot);
198 197
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index be3c083db..6081d92e9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -22,7 +22,7 @@ nvhost_ctrl_gpu::~nvhost_ctrl_gpu() {
22} 22}
23 23
24NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 24NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
25 std::vector<u8>& output) { 25 std::span<u8> output) {
26 switch (command.group) { 26 switch (command.group) {
27 case 'G': 27 case 'G':
28 switch (command.cmd) { 28 switch (command.cmd) {
@@ -54,13 +54,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8>
54} 54}
55 55
56NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 56NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
57 std::span<const u8> inline_input, std::vector<u8>& output) { 57 std::span<const u8> inline_input, std::span<u8> output) {
58 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 58 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
59 return NvResult::NotImplemented; 59 return NvResult::NotImplemented;
60} 60}
61 61
62NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 62NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
63 std::vector<u8>& output, std::vector<u8>& inline_output) { 63 std::span<u8> output, std::span<u8> inline_output) {
64 switch (command.group) { 64 switch (command.group) {
65 case 'G': 65 case 'G':
66 switch (command.cmd) { 66 switch (command.cmd) {
@@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
82void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} 82void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {}
83void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} 83void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {}
84 84
85NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output) { 85NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output) {
86 LOG_DEBUG(Service_NVDRV, "called"); 86 LOG_DEBUG(Service_NVDRV, "called");
87 IoctlCharacteristics params{}; 87 IoctlCharacteristics params{};
88 std::memcpy(&params, input.data(), input.size()); 88 std::memcpy(&params, input.data(), input.size());
@@ -127,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
127 return NvResult::Success; 127 return NvResult::Success;
128} 128}
129 129
130NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, 130NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output,
131 std::vector<u8>& inline_output) { 131 std::span<u8> inline_output) {
132 LOG_DEBUG(Service_NVDRV, "called"); 132 LOG_DEBUG(Service_NVDRV, "called");
133 IoctlCharacteristics params{}; 133 IoctlCharacteristics params{};
134 std::memcpy(&params, input.data(), input.size()); 134 std::memcpy(&params, input.data(), input.size());
@@ -175,7 +175,7 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
175 return NvResult::Success; 175 return NvResult::Success;
176} 176}
177 177
178NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output) { 178NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output) {
179 IoctlGpuGetTpcMasksArgs params{}; 179 IoctlGpuGetTpcMasksArgs params{};
180 std::memcpy(&params, input.data(), input.size()); 180 std::memcpy(&params, input.data(), input.size());
181 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); 181 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@@ -186,8 +186,8 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
186 return NvResult::Success; 186 return NvResult::Success;
187} 187}
188 188
189NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, 189NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output,
190 std::vector<u8>& inline_output) { 190 std::span<u8> inline_output) {
191 IoctlGpuGetTpcMasksArgs params{}; 191 IoctlGpuGetTpcMasksArgs params{};
192 std::memcpy(&params, input.data(), input.size()); 192 std::memcpy(&params, input.data(), input.size());
193 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); 193 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@@ -199,7 +199,7 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
199 return NvResult::Success; 199 return NvResult::Success;
200} 200}
201 201
202NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output) { 202NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::span<u8> output) {
203 LOG_DEBUG(Service_NVDRV, "called"); 203 LOG_DEBUG(Service_NVDRV, "called");
204 204
205 IoctlActiveSlotMask params{}; 205 IoctlActiveSlotMask params{};
@@ -212,7 +212,7 @@ NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vect
212 return NvResult::Success; 212 return NvResult::Success;
213} 213}
214 214
215NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output) { 215NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output) {
216 LOG_DEBUG(Service_NVDRV, "called"); 216 LOG_DEBUG(Service_NVDRV, "called");
217 217
218 IoctlZcullGetCtxSize params{}; 218 IoctlZcullGetCtxSize params{};
@@ -224,7 +224,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector
224 return NvResult::Success; 224 return NvResult::Success;
225} 225}
226 226
227NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output) { 227NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::span<u8> output) {
228 LOG_DEBUG(Service_NVDRV, "called"); 228 LOG_DEBUG(Service_NVDRV, "called");
229 229
230 IoctlNvgpuGpuZcullGetInfoArgs params{}; 230 IoctlNvgpuGpuZcullGetInfoArgs params{};
@@ -247,7 +247,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8
247 return NvResult::Success; 247 return NvResult::Success;
248} 248}
249 249
250NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>& output) { 250NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::span<u8> output) {
251 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 251 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
252 252
253 IoctlZbcSetTable params{}; 253 IoctlZbcSetTable params{};
@@ -263,7 +263,7 @@ NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>
263 return NvResult::Success; 263 return NvResult::Success;
264} 264}
265 265
266NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output) { 266NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::span<u8> output) {
267 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 267 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
268 268
269 IoctlZbcQueryTable params{}; 269 IoctlZbcQueryTable params{};
@@ -273,7 +273,7 @@ NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u
273 return NvResult::Success; 273 return NvResult::Success;
274} 274}
275 275
276NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& output) { 276NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::span<u8> output) {
277 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 277 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
278 278
279 IoctlFlushL2 params{}; 279 IoctlFlushL2 params{};
@@ -283,7 +283,7 @@ NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& ou
283 return NvResult::Success; 283 return NvResult::Success;
284} 284}
285 285
286NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::vector<u8>& output) { 286NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::span<u8> output) {
287 LOG_DEBUG(Service_NVDRV, "called"); 287 LOG_DEBUG(Service_NVDRV, "called");
288 288
289 IoctlGetGpuTime params{}; 289 IoctlGetGpuTime params{};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index b9333d9d3..97995551c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -22,11 +22,11 @@ public:
22 ~nvhost_ctrl_gpu() override; 22 ~nvhost_ctrl_gpu() override;
23 23
24 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 24 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
25 std::vector<u8>& output) override; 25 std::span<u8> output) override;
26 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 26 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
27 std::span<const u8> inline_input, std::vector<u8>& output) override; 27 std::span<const u8> inline_input, std::span<u8> output) override;
28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
29 std::vector<u8>& inline_output) override; 29 std::span<u8> inline_output) override;
30 30
31 void OnOpen(DeviceFD fd) override; 31 void OnOpen(DeviceFD fd) override;
32 void OnClose(DeviceFD fd) override; 32 void OnClose(DeviceFD fd) override;
@@ -151,21 +151,21 @@ private:
151 }; 151 };
152 static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size"); 152 static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size");
153 153
154 NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output); 154 NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output);
155 NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, 155 NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output,
156 std::vector<u8>& inline_output); 156 std::span<u8> inline_output);
157 157
158 NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output); 158 NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output);
159 NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, 159 NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output,
160 std::vector<u8>& inline_output); 160 std::span<u8> inline_output);
161 161
162 NvResult GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output); 162 NvResult GetActiveSlotMask(std::span<const u8> input, std::span<u8> output);
163 NvResult ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output); 163 NvResult ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output);
164 NvResult ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output); 164 NvResult ZCullGetInfo(std::span<const u8> input, std::span<u8> output);
165 NvResult ZBCSetTable(std::span<const u8> input, std::vector<u8>& output); 165 NvResult ZBCSetTable(std::span<const u8> input, std::span<u8> output);
166 NvResult ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output); 166 NvResult ZBCQueryTable(std::span<const u8> input, std::span<u8> output);
167 NvResult FlushL2(std::span<const u8> input, std::vector<u8>& output); 167 NvResult FlushL2(std::span<const u8> input, std::span<u8> output);
168 NvResult GetGpuTime(std::span<const u8> input, std::vector<u8>& output); 168 NvResult GetGpuTime(std::span<const u8> input, std::span<u8> output);
169 169
170 EventInterface& events_interface; 170 EventInterface& events_interface;
171 171
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 453a965dc..46a25fcab 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -47,7 +47,7 @@ nvhost_gpu::~nvhost_gpu() {
47} 47}
48 48
49NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 49NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
50 std::vector<u8>& output) { 50 std::span<u8> output) {
51 switch (command.group) { 51 switch (command.group) {
52 case 0x0: 52 case 0x0:
53 switch (command.cmd) { 53 switch (command.cmd) {
@@ -99,7 +99,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
99}; 99};
100 100
101NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 101NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
102 std::span<const u8> inline_input, std::vector<u8>& output) { 102 std::span<const u8> inline_input, std::span<u8> output) {
103 switch (command.group) { 103 switch (command.group) {
104 case 'H': 104 case 'H':
105 switch (command.cmd) { 105 switch (command.cmd) {
@@ -113,7 +113,7 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> inpu
113} 113}
114 114
115NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 115NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
116 std::vector<u8>& output, std::vector<u8>& inline_output) { 116 std::span<u8> output, std::span<u8> inline_output) {
117 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 117 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
118 return NvResult::NotImplemented; 118 return NvResult::NotImplemented;
119} 119}
@@ -121,7 +121,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
121void nvhost_gpu::OnOpen(DeviceFD fd) {} 121void nvhost_gpu::OnOpen(DeviceFD fd) {}
122void nvhost_gpu::OnClose(DeviceFD fd) {} 122void nvhost_gpu::OnClose(DeviceFD fd) {}
123 123
124NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { 124NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
125 IoctlSetNvmapFD params{}; 125 IoctlSetNvmapFD params{};
126 std::memcpy(&params, input.data(), input.size()); 126 std::memcpy(&params, input.data(), input.size());
127 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); 127 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
@@ -130,7 +130,7 @@ NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& outp
130 return NvResult::Success; 130 return NvResult::Success;
131} 131}
132 132
133NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& output) { 133NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::span<u8> output) {
134 LOG_DEBUG(Service_NVDRV, "called"); 134 LOG_DEBUG(Service_NVDRV, "called");
135 135
136 IoctlClientData params{}; 136 IoctlClientData params{};
@@ -139,7 +139,7 @@ NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& o
139 return NvResult::Success; 139 return NvResult::Success;
140} 140}
141 141
142NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& output) { 142NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::span<u8> output) {
143 LOG_DEBUG(Service_NVDRV, "called"); 143 LOG_DEBUG(Service_NVDRV, "called");
144 144
145 IoctlClientData params{}; 145 IoctlClientData params{};
@@ -149,7 +149,7 @@ NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& o
149 return NvResult::Success; 149 return NvResult::Success;
150} 150}
151 151
152NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& output) { 152NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::span<u8> output) {
153 std::memcpy(&zcull_params, input.data(), input.size()); 153 std::memcpy(&zcull_params, input.data(), input.size());
154 LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va, 154 LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va,
155 zcull_params.mode); 155 zcull_params.mode);
@@ -158,7 +158,7 @@ NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& outpu
158 return NvResult::Success; 158 return NvResult::Success;
159} 159}
160 160
161NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output) { 161NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::span<u8> output) {
162 IoctlSetErrorNotifier params{}; 162 IoctlSetErrorNotifier params{};
163 std::memcpy(&params, input.data(), input.size()); 163 std::memcpy(&params, input.data(), input.size());
164 LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset, 164 LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset,
@@ -168,14 +168,14 @@ NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>
168 return NvResult::Success; 168 return NvResult::Success;
169} 169}
170 170
171NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::vector<u8>& output) { 171NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::span<u8> output) {
172 std::memcpy(&channel_priority, input.data(), input.size()); 172 std::memcpy(&channel_priority, input.data(), input.size());
173 LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority); 173 LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);
174 174
175 return NvResult::Success; 175 return NvResult::Success;
176} 176}
177 177
178NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output) { 178NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output) {
179 IoctlAllocGpfifoEx2 params{}; 179 IoctlAllocGpfifoEx2 params{};
180 std::memcpy(&params, input.data(), input.size()); 180 std::memcpy(&params, input.data(), input.size());
181 LOG_WARNING(Service_NVDRV, 181 LOG_WARNING(Service_NVDRV,
@@ -197,7 +197,7 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>&
197 return NvResult::Success; 197 return NvResult::Success;
198} 198}
199 199
200NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output) { 200NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::span<u8> output) {
201 IoctlAllocObjCtx params{}; 201 IoctlAllocObjCtx params{};
202 std::memcpy(&params, input.data(), input.size()); 202 std::memcpy(&params, input.data(), input.size());
203 LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num, 203 LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num,
@@ -208,7 +208,8 @@ NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vecto
208 return NvResult::Success; 208 return NvResult::Success;
209} 209}
210 210
211static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { 211static boost::container::small_vector<Tegra::CommandHeader, 512> BuildWaitCommandList(
212 NvFence fence) {
212 return { 213 return {
213 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, 214 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
214 Tegra::SubmissionMode::Increasing), 215 Tegra::SubmissionMode::Increasing),
@@ -219,35 +220,35 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
219 }; 220 };
220} 221}
221 222
222static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) { 223static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementCommandList(
223 std::vector<Tegra::CommandHeader> result{ 224 NvFence fence) {
225 boost::container::small_vector<Tegra::CommandHeader, 512> result{
224 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, 226 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
225 Tegra::SubmissionMode::Increasing), 227 Tegra::SubmissionMode::Increasing),
226 {}}; 228 {}};
227 229
228 for (u32 count = 0; count < 2; ++count) { 230 for (u32 count = 0; count < 2; ++count) {
229 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, 231 result.push_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
230 Tegra::SubmissionMode::Increasing)); 232 Tegra::SubmissionMode::Increasing));
231 result.emplace_back( 233 result.push_back(
232 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); 234 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
233 } 235 }
234 236
235 return result; 237 return result;
236} 238}
237 239
238static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) { 240static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementWithWfiCommandList(
239 std::vector<Tegra::CommandHeader> result{ 241 NvFence fence) {
242 boost::container::small_vector<Tegra::CommandHeader, 512> result{
240 Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, 243 Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
241 Tegra::SubmissionMode::Increasing), 244 Tegra::SubmissionMode::Increasing),
242 {}}; 245 {}};
243 const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)}; 246 auto increment_list{BuildIncrementCommandList(fence)};
244 247 result.insert(result.end(), increment_list.begin(), increment_list.end());
245 result.insert(result.end(), increment.begin(), increment.end());
246
247 return result; 248 return result;
248} 249}
249 250
250NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, 251NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
251 Tegra::CommandList&& entries) { 252 Tegra::CommandList&& entries) {
252 LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, 253 LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
253 params.num_entries, params.flags.raw); 254 params.num_entries, params.flags.raw);
@@ -293,7 +294,7 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
293 return NvResult::Success; 294 return NvResult::Success;
294} 295}
295 296
296NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, 297NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
297 bool kickoff) { 298 bool kickoff) {
298 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 299 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
299 UNIMPLEMENTED(); 300 UNIMPLEMENTED();
@@ -315,7 +316,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>
315} 316}
316 317
317NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, 318NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
318 std::vector<u8>& output) { 319 std::span<u8> output) {
319 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 320 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
320 UNIMPLEMENTED(); 321 UNIMPLEMENTED();
321 return NvResult::InvalidSize; 322 return NvResult::InvalidSize;
@@ -327,7 +328,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const
327 return SubmitGPFIFOImpl(params, output, std::move(entries)); 328 return SubmitGPFIFOImpl(params, output, std::move(entries));
328} 329}
329 330
330NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { 331NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
331 IoctlGetWaitbase params{}; 332 IoctlGetWaitbase params{};
332 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase)); 333 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
333 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); 334 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
@@ -337,7 +338,7 @@ NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& out
337 return NvResult::Success; 338 return NvResult::Success;
338} 339}
339 340
340NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output) { 341NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::span<u8> output) {
341 IoctlChannelSetTimeout params{}; 342 IoctlChannelSetTimeout params{};
342 std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout)); 343 std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout));
343 LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout); 344 LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);
@@ -345,7 +346,7 @@ NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8
345 return NvResult::Success; 346 return NvResult::Success;
346} 347}
347 348
348NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output) { 349NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output) {
349 IoctlSetTimeslice params{}; 350 IoctlSetTimeslice params{};
350 std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice)); 351 std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
351 LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); 352 LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 3ca58202d..529c20526 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -41,11 +41,11 @@ public:
41 ~nvhost_gpu() override; 41 ~nvhost_gpu() override;
42 42
43 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 43 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
44 std::vector<u8>& output) override; 44 std::span<u8> output) override;
45 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 45 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
46 std::span<const u8> inline_input, std::vector<u8>& output) override; 46 std::span<const u8> inline_input, std::span<u8> output) override;
47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
48 std::vector<u8>& inline_output) override; 48 std::span<u8> inline_output) override;
49 49
50 void OnOpen(DeviceFD fd) override; 50 void OnOpen(DeviceFD fd) override;
51 void OnClose(DeviceFD fd) override; 51 void OnClose(DeviceFD fd) override;
@@ -186,23 +186,23 @@ private:
186 u32_le channel_priority{}; 186 u32_le channel_priority{};
187 u32_le channel_timeslice{}; 187 u32_le channel_timeslice{};
188 188
189 NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); 189 NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
190 NvResult SetClientData(std::span<const u8> input, std::vector<u8>& output); 190 NvResult SetClientData(std::span<const u8> input, std::span<u8> output);
191 NvResult GetClientData(std::span<const u8> input, std::vector<u8>& output); 191 NvResult GetClientData(std::span<const u8> input, std::span<u8> output);
192 NvResult ZCullBind(std::span<const u8> input, std::vector<u8>& output); 192 NvResult ZCullBind(std::span<const u8> input, std::span<u8> output);
193 NvResult SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output); 193 NvResult SetErrorNotifier(std::span<const u8> input, std::span<u8> output);
194 NvResult SetChannelPriority(std::span<const u8> input, std::vector<u8>& output); 194 NvResult SetChannelPriority(std::span<const u8> input, std::span<u8> output);
195 NvResult AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output); 195 NvResult AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output);
196 NvResult AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output); 196 NvResult AllocateObjectContext(std::span<const u8> input, std::span<u8> output);
197 NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, 197 NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
198 Tegra::CommandList&& entries); 198 Tegra::CommandList&& entries);
199 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, 199 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
200 bool kickoff = false); 200 bool kickoff = false);
201 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, 201 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
202 std::vector<u8>& output); 202 std::span<u8> output);
203 NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); 203 NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
204 NvResult ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output); 204 NvResult ChannelSetTimeout(std::span<const u8> input, std::span<u8> output);
205 NvResult ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output); 205 NvResult ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output);
206 206
207 EventInterface& events_interface; 207 EventInterface& events_interface;
208 NvCore::Container& core; 208 NvCore::Container& core;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index dc45169ad..a174442a6 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -16,7 +16,7 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
16nvhost_nvdec::~nvhost_nvdec() = default; 16nvhost_nvdec::~nvhost_nvdec() = default;
17 17
18NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 18NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::vector<u8>& output) { 19 std::span<u8> output) {
20 switch (command.group) { 20 switch (command.group) {
21 case 0x0: 21 case 0x0:
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -56,13 +56,13 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
56} 56}
57 57
58NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 58NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
59 std::span<const u8> inline_input, std::vector<u8>& output) { 59 std::span<const u8> inline_input, std::span<u8> output) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 61 return NvResult::NotImplemented;
62} 62}
63 63
64NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 64NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
65 std::vector<u8>& output, std::vector<u8>& inline_output) { 65 std::span<u8> output, std::span<u8> inline_output) {
66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
67 return NvResult::NotImplemented; 67 return NvResult::NotImplemented;
68} 68}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 0d615bbcb..ad2233c49 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -14,11 +14,11 @@ public:
14 ~nvhost_nvdec() override; 14 ~nvhost_nvdec() override;
15 15
16 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 16 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
17 std::vector<u8>& output) override; 17 std::span<u8> output) override;
18 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 18 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::span<const u8> inline_input, std::vector<u8>& output) override; 19 std::span<const u8> inline_input, std::span<u8> output) override;
20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
21 std::vector<u8>& inline_output) override; 21 std::span<u8> inline_output) override;
22 22
23 void OnOpen(DeviceFD fd) override; 23 void OnOpen(DeviceFD fd) override;
24 void OnClose(DeviceFD fd) override; 24 void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 1ab51f10b..61649aa4a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -36,7 +36,7 @@ std::size_t SliceVectors(std::span<const u8> input, std::vector<T>& dst, std::si
36// Writes the data in src to an offset into the dst vector. The offset is specified in bytes 36// Writes the data in src to an offset into the dst vector. The offset is specified in bytes
37// Returns the number of bytes written into dst. 37// Returns the number of bytes written into dst.
38template <typename T> 38template <typename T>
39std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { 39std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size_t offset) {
40 if (src.empty()) { 40 if (src.empty()) {
41 return 0; 41 return 0;
42 } 42 }
@@ -72,8 +72,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(std::span<const u8> input) {
72 return NvResult::Success; 72 return NvResult::Success;
73} 73}
74 74
75NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, 75NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output) {
76 std::vector<u8>& output) {
77 IoctlSubmit params{}; 76 IoctlSubmit params{};
78 std::memcpy(&params, input.data(), sizeof(IoctlSubmit)); 77 std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
79 LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); 78 LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@@ -121,7 +120,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input,
121 return NvResult::Success; 120 return NvResult::Success;
122} 121}
123 122
124NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vector<u8>& output) { 123NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::span<u8> output) {
125 IoctlGetSyncpoint params{}; 124 IoctlGetSyncpoint params{};
126 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint)); 125 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
127 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); 126 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
@@ -133,7 +132,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vecto
133 return NvResult::Success; 132 return NvResult::Success;
134} 133}
135 134
136NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { 135NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
137 IoctlGetWaitbase params{}; 136 IoctlGetWaitbase params{};
138 LOG_CRITICAL(Service_NVDRV, "called WAITBASE"); 137 LOG_CRITICAL(Service_NVDRV, "called WAITBASE");
139 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase)); 138 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
@@ -142,7 +141,7 @@ NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector
142 return NvResult::Success; 141 return NvResult::Success;
143} 142}
144 143
145NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u8>& output) { 144NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::span<u8> output) {
146 IoctlMapBuffer params{}; 145 IoctlMapBuffer params{};
147 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); 146 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
148 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); 147 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -159,7 +158,7 @@ NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u
159 return NvResult::Success; 158 return NvResult::Success;
160} 159}
161 160
162NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { 161NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
163 IoctlMapBuffer params{}; 162 IoctlMapBuffer params{};
164 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); 163 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
165 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); 164 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -173,7 +172,7 @@ NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector
173 return NvResult::Success; 172 return NvResult::Success;
174} 173}
175 174
176NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output) { 175NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::span<u8> output) {
177 std::memcpy(&submit_timeout, input.data(), input.size()); 176 std::memcpy(&submit_timeout, input.data(), input.size());
178 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 177 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
179 return NvResult::Success; 178 return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index 5af26a26f..9bb573bfe 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -108,12 +108,12 @@ protected:
108 108
109 /// Ioctl command implementations 109 /// Ioctl command implementations
110 NvResult SetNVMAPfd(std::span<const u8> input); 110 NvResult SetNVMAPfd(std::span<const u8> input);
111 NvResult Submit(DeviceFD fd, std::span<const u8> input, std::vector<u8>& output); 111 NvResult Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output);
112 NvResult GetSyncpoint(std::span<const u8> input, std::vector<u8>& output); 112 NvResult GetSyncpoint(std::span<const u8> input, std::span<u8> output);
113 NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); 113 NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
114 NvResult MapBuffer(std::span<const u8> input, std::vector<u8>& output); 114 NvResult MapBuffer(std::span<const u8> input, std::span<u8> output);
115 NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); 115 NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
116 NvResult SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output); 116 NvResult SetSubmitTimeout(std::span<const u8> input, std::span<u8> output);
117 117
118 Kernel::KEvent* QueryEvent(u32 event_id) override; 118 Kernel::KEvent* QueryEvent(u32 event_id) override;
119 119
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 39f30e7c8..a05c8cdae 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -13,7 +13,7 @@ nvhost_nvjpg::nvhost_nvjpg(Core::System& system_) : nvdevice{system_} {}
13nvhost_nvjpg::~nvhost_nvjpg() = default; 13nvhost_nvjpg::~nvhost_nvjpg() = default;
14 14
15NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 15NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
16 std::vector<u8>& output) { 16 std::span<u8> output) {
17 switch (command.group) { 17 switch (command.group) {
18 case 'H': 18 case 'H':
19 switch (command.cmd) { 19 switch (command.cmd) {
@@ -32,13 +32,13 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
32} 32}
33 33
34NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 34NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
35 std::span<const u8> inline_input, std::vector<u8>& output) { 35 std::span<const u8> inline_input, std::span<u8> output) {
36 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 36 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
37 return NvResult::NotImplemented; 37 return NvResult::NotImplemented;
38} 38}
39 39
40NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 40NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
41 std::vector<u8>& output, std::vector<u8>& inline_output) { 41 std::span<u8> output, std::span<u8> inline_output) {
42 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 42 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
43 return NvResult::NotImplemented; 43 return NvResult::NotImplemented;
44} 44}
@@ -46,7 +46,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
46void nvhost_nvjpg::OnOpen(DeviceFD fd) {} 46void nvhost_nvjpg::OnOpen(DeviceFD fd) {}
47void nvhost_nvjpg::OnClose(DeviceFD fd) {} 47void nvhost_nvjpg::OnClose(DeviceFD fd) {}
48 48
49NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { 49NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
50 IoctlSetNvmapFD params{}; 50 IoctlSetNvmapFD params{};
51 std::memcpy(&params, input.data(), input.size()); 51 std::memcpy(&params, input.data(), input.size());
52 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); 52 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 41b57e872..5623e0d47 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -16,11 +16,11 @@ public:
16 ~nvhost_nvjpg() override; 16 ~nvhost_nvjpg() override;
17 17
18 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 18 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::vector<u8>& output) override; 19 std::span<u8> output) override;
20 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 20 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
21 std::span<const u8> inline_input, std::vector<u8>& output) override; 21 std::span<const u8> inline_input, std::span<u8> output) override;
22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
23 std::vector<u8>& inline_output) override; 23 std::span<u8> inline_output) override;
24 24
25 void OnOpen(DeviceFD fd) override; 25 void OnOpen(DeviceFD fd) override;
26 void OnClose(DeviceFD fd) override; 26 void OnClose(DeviceFD fd) override;
@@ -33,7 +33,7 @@ private:
33 33
34 s32_le nvmap_fd{}; 34 s32_le nvmap_fd{};
35 35
36 NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); 36 NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
37}; 37};
38 38
39} // namespace Service::Nvidia::Devices 39} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index b0ea402a7..c0b8684c3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -16,7 +16,7 @@ nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
16nvhost_vic::~nvhost_vic() = default; 16nvhost_vic::~nvhost_vic() = default;
17 17
18NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 18NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::vector<u8>& output) { 19 std::span<u8> output) {
20 switch (command.group) { 20 switch (command.group) {
21 case 0x0: 21 case 0x0:
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -56,13 +56,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
56} 56}
57 57
58NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 58NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
59 std::span<const u8> inline_input, std::vector<u8>& output) { 59 std::span<const u8> inline_input, std::span<u8> output) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 61 return NvResult::NotImplemented;
62} 62}
63 63
64NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 64NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
65 std::vector<u8>& output, std::vector<u8>& inline_output) { 65 std::span<u8> output, std::span<u8> inline_output) {
66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
67 return NvResult::NotImplemented; 67 return NvResult::NotImplemented;
68} 68}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index b5e350a83..cadbcb0a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -13,11 +13,11 @@ public:
13 ~nvhost_vic(); 13 ~nvhost_vic();
14 14
15 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 15 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
16 std::vector<u8>& output) override; 16 std::span<u8> output) override;
17 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 17 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
18 std::span<const u8> inline_input, std::vector<u8>& output) override; 18 std::span<const u8> inline_input, std::span<u8> output) override;
19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
20 std::vector<u8>& inline_output) override; 20 std::span<u8> inline_output) override;
21 21
22 void OnOpen(DeviceFD fd) override; 22 void OnOpen(DeviceFD fd) override;
23 void OnClose(DeviceFD fd) override; 23 void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 07417f045..e7f7e273b 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -26,7 +26,7 @@ nvmap::nvmap(Core::System& system_, NvCore::Container& container_)
26nvmap::~nvmap() = default; 26nvmap::~nvmap() = default;
27 27
28NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 28NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
29 std::vector<u8>& output) { 29 std::span<u8> output) {
30 switch (command.group) { 30 switch (command.group) {
31 case 0x1: 31 case 0x1:
32 switch (command.cmd) { 32 switch (command.cmd) {
@@ -55,13 +55,13 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
55} 55}
56 56
57NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 57NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
58 std::span<const u8> inline_input, std::vector<u8>& output) { 58 std::span<const u8> inline_input, std::span<u8> output) {
59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
60 return NvResult::NotImplemented; 60 return NvResult::NotImplemented;
61} 61}
62 62
63NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 63NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
64 std::vector<u8>& output, std::vector<u8>& inline_output) { 64 std::span<u8> inline_output) {
65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
66 return NvResult::NotImplemented; 66 return NvResult::NotImplemented;
67} 67}
@@ -69,7 +69,7 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
69void nvmap::OnOpen(DeviceFD fd) {} 69void nvmap::OnOpen(DeviceFD fd) {}
70void nvmap::OnClose(DeviceFD fd) {} 70void nvmap::OnClose(DeviceFD fd) {}
71 71
72NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) { 72NvResult nvmap::IocCreate(std::span<const u8> input, std::span<u8> output) {
73 IocCreateParams params; 73 IocCreateParams params;
74 std::memcpy(&params, input.data(), sizeof(params)); 74 std::memcpy(&params, input.data(), sizeof(params));
75 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); 75 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);
@@ -89,7 +89,7 @@ NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) {
89 return NvResult::Success; 89 return NvResult::Success;
90} 90}
91 91
92NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) { 92NvResult nvmap::IocAlloc(std::span<const u8> input, std::span<u8> output) {
93 IocAllocParams params; 93 IocAllocParams params;
94 std::memcpy(&params, input.data(), sizeof(params)); 94 std::memcpy(&params, input.data(), sizeof(params));
95 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); 95 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);
@@ -137,7 +137,7 @@ NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) {
137 return result; 137 return result;
138} 138}
139 139
140NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) { 140NvResult nvmap::IocGetId(std::span<const u8> input, std::span<u8> output) {
141 IocGetIdParams params; 141 IocGetIdParams params;
142 std::memcpy(&params, input.data(), sizeof(params)); 142 std::memcpy(&params, input.data(), sizeof(params));
143 143
@@ -161,7 +161,7 @@ NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) {
161 return NvResult::Success; 161 return NvResult::Success;
162} 162}
163 163
164NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) { 164NvResult nvmap::IocFromId(std::span<const u8> input, std::span<u8> output) {
165 IocFromIdParams params; 165 IocFromIdParams params;
166 std::memcpy(&params, input.data(), sizeof(params)); 166 std::memcpy(&params, input.data(), sizeof(params));
167 167
@@ -192,7 +192,7 @@ NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) {
192 return NvResult::Success; 192 return NvResult::Success;
193} 193}
194 194
195NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) { 195NvResult nvmap::IocParam(std::span<const u8> input, std::span<u8> output) {
196 enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 }; 196 enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 };
197 197
198 IocParamParams params; 198 IocParamParams params;
@@ -241,7 +241,7 @@ NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) {
241 return NvResult::Success; 241 return NvResult::Success;
242} 242}
243 243
244NvResult nvmap::IocFree(std::span<const u8> input, std::vector<u8>& output) { 244NvResult nvmap::IocFree(std::span<const u8> input, std::span<u8> output) {
245 IocFreeParams params; 245 IocFreeParams params;
246 std::memcpy(&params, input.data(), sizeof(params)); 246 std::memcpy(&params, input.data(), sizeof(params));
247 247
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 82bd3b118..40c65b430 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -27,11 +27,11 @@ public:
27 nvmap& operator=(const nvmap&) = delete; 27 nvmap& operator=(const nvmap&) = delete;
28 28
29 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 29 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
30 std::vector<u8>& output) override; 30 std::span<u8> output) override;
31 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 31 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
32 std::span<const u8> inline_input, std::vector<u8>& output) override; 32 std::span<const u8> inline_input, std::span<u8> output) override;
33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
34 std::vector<u8>& inline_output) override; 34 std::span<u8> inline_output) override;
35 35
36 void OnOpen(DeviceFD fd) override; 36 void OnOpen(DeviceFD fd) override;
37 void OnClose(DeviceFD fd) override; 37 void OnClose(DeviceFD fd) override;
@@ -106,12 +106,12 @@ private:
106 }; 106 };
107 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); 107 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
108 108
109 NvResult IocCreate(std::span<const u8> input, std::vector<u8>& output); 109 NvResult IocCreate(std::span<const u8> input, std::span<u8> output);
110 NvResult IocAlloc(std::span<const u8> input, std::vector<u8>& output); 110 NvResult IocAlloc(std::span<const u8> input, std::span<u8> output);
111 NvResult IocGetId(std::span<const u8> input, std::vector<u8>& output); 111 NvResult IocGetId(std::span<const u8> input, std::span<u8> output);
112 NvResult IocFromId(std::span<const u8> input, std::vector<u8>& output); 112 NvResult IocFromId(std::span<const u8> input, std::span<u8> output);
113 NvResult IocParam(std::span<const u8> input, std::vector<u8>& output); 113 NvResult IocParam(std::span<const u8> input, std::span<u8> output);
114 NvResult IocFree(std::span<const u8> input, std::vector<u8>& output); 114 NvResult IocFree(std::span<const u8> input, std::span<u8> output);
115 115
116 NvCore::Container& container; 116 NvCore::Container& container;
117 NvCore::NvMap& file; 117 NvCore::NvMap& file;
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 3d774eec4..9e46ee8dd 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -130,7 +130,7 @@ DeviceFD Module::Open(const std::string& device_name) {
130} 130}
131 131
132NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 132NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
133 std::vector<u8>& output) { 133 std::span<u8> output) {
134 if (fd < 0) { 134 if (fd < 0) {
135 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 135 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
136 return NvResult::InvalidState; 136 return NvResult::InvalidState;
@@ -147,7 +147,7 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
147} 147}
148 148
149NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 149NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
150 std::span<const u8> inline_input, std::vector<u8>& output) { 150 std::span<const u8> inline_input, std::span<u8> output) {
151 if (fd < 0) { 151 if (fd < 0) {
152 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 152 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
153 return NvResult::InvalidState; 153 return NvResult::InvalidState;
@@ -163,8 +163,8 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
163 return itr->second->Ioctl2(fd, command, input, inline_input, output); 163 return itr->second->Ioctl2(fd, command, input, inline_input, output);
164} 164}
165 165
166NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 166NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
167 std::vector<u8>& output, std::vector<u8>& inline_output) { 167 std::span<u8> inline_output) {
168 if (fd < 0) { 168 if (fd < 0) {
169 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 169 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
170 return NvResult::InvalidState; 170 return NvResult::InvalidState;
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 668be742b..d8622b3ca 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -80,13 +80,13 @@ public:
80 DeviceFD Open(const std::string& device_name); 80 DeviceFD Open(const std::string& device_name);
81 81
82 /// Sends an ioctl command to the specified file descriptor. 82 /// Sends an ioctl command to the specified file descriptor.
83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output); 83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output);
84 84
85 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 85 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
86 std::span<const u8> inline_input, std::vector<u8>& output); 86 std::span<const u8> inline_input, std::span<u8> output);
87 87
88 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 88 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
89 std::vector<u8>& inline_output); 89 std::span<u8> inline_output);
90 90
91 /// Closes a device file descriptor and returns operation success. 91 /// Closes a device file descriptor and returns operation success.
92 NvResult Close(DeviceFD fd); 92 NvResult Close(DeviceFD fd);
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index d010a1e03..348207e25 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -63,12 +63,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) {
63 } 63 }
64 64
65 // Check device 65 // Check device
66 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 66 tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
67 const auto input_buffer = ctx.ReadBuffer(0); 67 const auto input_buffer = ctx.ReadBuffer(0);
68 68
69 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); 69 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output);
70 if (command.is_out != 0) { 70 if (command.is_out != 0) {
71 ctx.WriteBuffer(output_buffer); 71 ctx.WriteBuffer(tmp_output);
72 } 72 }
73 73
74 IPC::ResponseBuilder rb{ctx, 3}; 74 IPC::ResponseBuilder rb{ctx, 3};
@@ -90,12 +90,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) {
90 90
91 const auto input_buffer = ctx.ReadBuffer(0); 91 const auto input_buffer = ctx.ReadBuffer(0);
92 const auto input_inlined_buffer = ctx.ReadBuffer(1); 92 const auto input_inlined_buffer = ctx.ReadBuffer(1);
93 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 93 tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
94 94
95 const auto nv_result = 95 const auto nv_result =
96 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); 96 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output);
97 if (command.is_out != 0) { 97 if (command.is_out != 0) {
98 ctx.WriteBuffer(output_buffer); 98 ctx.WriteBuffer(tmp_output);
99 } 99 }
100 100
101 IPC::ResponseBuilder rb{ctx, 3}; 101 IPC::ResponseBuilder rb{ctx, 3};
@@ -116,14 +116,12 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) {
116 } 116 }
117 117
118 const auto input_buffer = ctx.ReadBuffer(0); 118 const auto input_buffer = ctx.ReadBuffer(0);
119 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 119 tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
120 std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); 120 tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1));
121 121 const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline);
122 const auto nv_result =
123 nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
124 if (command.is_out != 0) { 122 if (command.is_out != 0) {
125 ctx.WriteBuffer(output_buffer, 0); 123 ctx.WriteBuffer(tmp_output, 0);
126 ctx.WriteBuffer(output_buffer_inline, 1); 124 ctx.WriteBuffer(tmp_output_inline, 1);
127 } 125 }
128 126
129 IPC::ResponseBuilder rb{ctx, 3}; 127 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 881ea1a6b..4b593ff90 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -4,6 +4,7 @@
4#pragma once 4#pragma once
5 5
6#include <memory> 6#include <memory>
7#include "common/scratch_buffer.h"
7#include "core/hle/service/nvdrv/nvdrv.h" 8#include "core/hle/service/nvdrv/nvdrv.h"
8#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
9 10
@@ -33,6 +34,8 @@ private:
33 34
34 u64 pid{}; 35 u64 pid{};
35 bool is_initialized{}; 36 bool is_initialized{};
37 Common::ScratchBuffer<u8> tmp_output;
38 Common::ScratchBuffer<u8> tmp_output_inline;
36}; 39};
37 40
38} // namespace Service::Nvidia 41} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp
index da2d5890f..b41c6240c 100644
--- a/src/core/hle/service/nvnflinger/nvnflinger.cpp
+++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp
@@ -70,7 +70,8 @@ Nvnflinger::Nvnflinger(Core::System& system_, HosBinderDriverServer& hos_binder_
70 [this](std::uintptr_t, s64 time, 70 [this](std::uintptr_t, s64 time,
71 std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> { 71 std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
72 vsync_signal.store(true); 72 vsync_signal.store(true);
73 vsync_signal.notify_all(); 73 { const auto lock_guard = Lock(); }
74 vsync_signal.notify_one();
74 return std::chrono::nanoseconds(GetNextTicks()); 75 return std::chrono::nanoseconds(GetNextTicks());
75 }); 76 });
76 77
diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h
index fb56d75d7..23ba315a0 100644
--- a/src/core/hle/service/nvnflinger/parcel.h
+++ b/src/core/hle/service/nvnflinger/parcel.h
@@ -6,6 +6,7 @@
6#include <memory> 6#include <memory>
7#include <span> 7#include <span>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9 10
10#include "common/alignment.h" 11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
@@ -167,7 +168,7 @@ public:
167private: 168private:
168 template <typename T> 169 template <typename T>
169 requires(std::is_trivially_copyable_v<T>) 170 requires(std::is_trivially_copyable_v<T>)
170 void WriteImpl(const T& val, std::vector<u8>& buffer) { 171 void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) {
171 const size_t aligned_size = Common::AlignUp(sizeof(T), 4); 172 const size_t aligned_size = Common::AlignUp(sizeof(T), 4);
172 const size_t old_size = buffer.size(); 173 const size_t old_size = buffer.size();
173 buffer.resize(old_size + aligned_size); 174 buffer.resize(old_size + aligned_size);
@@ -176,8 +177,8 @@ private:
176 } 177 }
177 178
178private: 179private:
179 std::vector<u8> m_data_buffer; 180 boost::container::small_vector<u8, 0x200> m_data_buffer;
180 std::vector<u8> m_object_buffer; 181 boost::container::small_vector<u8, 0x200> m_object_buffer;
181}; 182};
182 183
183} // namespace Service::android 184} // namespace Service::android
diff --git a/src/core/hle/service/time/clock_types.h b/src/core/hle/service/time/clock_types.h
index e6293ffb9..9fc01ea90 100644
--- a/src/core/hle/service/time/clock_types.h
+++ b/src/core/hle/service/time/clock_types.h
@@ -3,6 +3,8 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <ratio>
7
6#include "common/common_funcs.h" 8#include "common/common_funcs.h"
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "common/uuid.h" 10#include "common/uuid.h"
@@ -74,18 +76,19 @@ static_assert(std::is_trivially_copyable_v<ContinuousAdjustmentTimePoint>,
74/// https://switchbrew.org/wiki/Glue_services#TimeSpanType 76/// https://switchbrew.org/wiki/Glue_services#TimeSpanType
75struct TimeSpanType { 77struct TimeSpanType {
76 s64 nanoseconds{}; 78 s64 nanoseconds{};
77 static constexpr s64 ns_per_second{1000000000ULL};
78 79
79 s64 ToSeconds() const { 80 s64 ToSeconds() const {
80 return nanoseconds / ns_per_second; 81 return nanoseconds / std::nano::den;
81 } 82 }
82 83
83 static TimeSpanType FromSeconds(s64 seconds) { 84 static TimeSpanType FromSeconds(s64 seconds) {
84 return {seconds * ns_per_second}; 85 return {seconds * std::nano::den};
85 } 86 }
86 87
87 static TimeSpanType FromTicks(u64 ticks, u64 frequency) { 88 template <u64 Frequency>
88 return FromSeconds(static_cast<s64>(ticks) / static_cast<s64>(frequency)); 89 static TimeSpanType FromTicks(u64 ticks) {
90 using TicksToNSRatio = std::ratio<std::nano::den, Frequency>;
91 return {static_cast<s64>(ticks * TicksToNSRatio::num / TicksToNSRatio::den)};
89 } 92 }
90}; 93};
91static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size"); 94static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size");
diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp
index 3dbbb9850..5627b7003 100644
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -10,7 +10,7 @@ namespace Service::Time::Clock {
10 10
11TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { 11TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
12 const TimeSpanType ticks_time_span{ 12 const TimeSpanType ticks_time_span{
13 TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; 13 TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())};
14 TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; 14 TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};
15 15
16 if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { 16 if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
index 27600413e..0d9fb3143 100644
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -10,7 +10,7 @@ namespace Service::Time::Clock {
10 10
11SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { 11SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
12 const TimeSpanType ticks_time_span{ 12 const TimeSpanType ticks_time_span{
13 TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; 13 TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())};
14 14
15 return {ticks_time_span.ToSeconds(), GetClockSourceId()}; 15 return {ticks_time_span.ToSeconds(), GetClockSourceId()};
16} 16}
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 868be60c5..7197ca30f 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -240,8 +240,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(HLERequestCon
240 const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; 240 const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};
241 241
242 if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { 242 if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
243 const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), 243 const auto ticks{Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(
244 Core::Hardware::CNTFREQ)}; 244 system.CoreTiming().GetClockTicks())};
245 const s64 base_time_point{context.offset + current_time_point.time_point - 245 const s64 base_time_point{context.offset + current_time_point.time_point -
246 ticks.ToSeconds()}; 246 ticks.ToSeconds()};
247 IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; 247 IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
index ce1c85bcc..a00676669 100644
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -21,8 +21,9 @@ SharedMemory::~SharedMemory() = default;
21 21
22void SharedMemory::SetupStandardSteadyClock(const Common::UUID& clock_source_id, 22void SharedMemory::SetupStandardSteadyClock(const Common::UUID& clock_source_id,
23 Clock::TimeSpanType current_time_point) { 23 Clock::TimeSpanType current_time_point) {
24 const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( 24 const Clock::TimeSpanType ticks_time_span{
25 system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; 25 Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(
26 system.CoreTiming().GetClockTicks())};
26 const Clock::SteadyClockContext context{ 27 const Clock::SteadyClockContext context{
27 static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), 28 static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
28 clock_source_id}; 29 clock_source_id};
diff --git a/src/core/hle/service/time/time_zone_manager.cpp b/src/core/hle/service/time/time_zone_manager.cpp
index e1728c06d..205371a26 100644
--- a/src/core/hle/service/time/time_zone_manager.cpp
+++ b/src/core/hle/service/time/time_zone_manager.cpp
@@ -849,8 +849,9 @@ static Result CreateCalendarTime(s64 time, int gmt_offset, CalendarTimeInternal&
849static Result ToCalendarTimeInternal(const TimeZoneRule& rules, s64 time, 849static Result ToCalendarTimeInternal(const TimeZoneRule& rules, s64 time,
850 CalendarTimeInternal& calendar_time, 850 CalendarTimeInternal& calendar_time,
851 CalendarAdditionalInfo& calendar_additional_info) { 851 CalendarAdditionalInfo& calendar_additional_info) {
852 if ((rules.go_ahead && time < rules.ats[0]) || 852 ASSERT(rules.go_ahead ? rules.time_count > 0 : true);
853 (rules.go_back && time > rules.ats[rules.time_count - 1])) { 853 if ((rules.go_back && time < rules.ats[0]) ||
854 (rules.go_ahead && time > rules.ats[rules.time_count - 1])) {
854 s64 seconds{}; 855 s64 seconds{};
855 if (time < rules.ats[0]) { 856 if (time < rules.ats[0]) {
856 seconds = rules.ats[0] - time; 857 seconds = rules.ats[0] - time;
@@ -910,9 +911,13 @@ static Result ToCalendarTimeInternal(const TimeZoneRule& rules, s64 time,
910 911
911 calendar_additional_info.is_dst = rules.ttis[tti_index].is_dst; 912 calendar_additional_info.is_dst = rules.ttis[tti_index].is_dst;
912 const char* time_zone{&rules.chars[rules.ttis[tti_index].abbreviation_list_index]}; 913 const char* time_zone{&rules.chars[rules.ttis[tti_index].abbreviation_list_index]};
913 for (int index{}; time_zone[index] != '\0'; ++index) { 914 u32 index;
915 for (index = 0; time_zone[index] != '\0' && time_zone[index] != ',' &&
916 index < calendar_additional_info.timezone_name.size() - 1;
917 ++index) {
914 calendar_additional_info.timezone_name[index] = time_zone[index]; 918 calendar_additional_info.timezone_name[index] = time_zone[index];
915 } 919 }
920 calendar_additional_info.timezone_name[index] = '\0';
916 return ResultSuccess; 921 return ResultSuccess;
917} 922}
918 923
diff --git a/src/core/hle/service/time/time_zone_service.cpp b/src/core/hle/service/time/time_zone_service.cpp
index e8273e152..8171c82a5 100644
--- a/src/core/hle/service/time/time_zone_service.cpp
+++ b/src/core/hle/service/time/time_zone_service.cpp
@@ -112,20 +112,14 @@ void ITimeZoneService::LoadTimeZoneRule(HLERequestContext& ctx) {
112 LOG_DEBUG(Service_Time, "called, location_name={}", location_name); 112 LOG_DEBUG(Service_Time, "called, location_name={}", location_name);
113 113
114 TimeZone::TimeZoneRule time_zone_rule{}; 114 TimeZone::TimeZoneRule time_zone_rule{};
115 if (const Result result{ 115 const Result result{time_zone_content_manager.LoadTimeZoneRule(time_zone_rule, location_name)};
116 time_zone_content_manager.LoadTimeZoneRule(time_zone_rule, location_name)};
117 result != ResultSuccess) {
118 IPC::ResponseBuilder rb{ctx, 2};
119 rb.Push(result);
120 return;
121 }
122 116
123 std::vector<u8> time_zone_rule_outbuffer(sizeof(TimeZone::TimeZoneRule)); 117 std::vector<u8> time_zone_rule_outbuffer(sizeof(TimeZone::TimeZoneRule));
124 std::memcpy(time_zone_rule_outbuffer.data(), &time_zone_rule, sizeof(TimeZone::TimeZoneRule)); 118 std::memcpy(time_zone_rule_outbuffer.data(), &time_zone_rule, sizeof(TimeZone::TimeZoneRule));
125 ctx.WriteBuffer(time_zone_rule_outbuffer); 119 ctx.WriteBuffer(time_zone_rule_outbuffer);
126 120
127 IPC::ResponseBuilder rb{ctx, 2}; 121 IPC::ResponseBuilder rb{ctx, 2};
128 rb.Push(ResultSuccess); 122 rb.Push(result);
129} 123}
130 124
131void ITimeZoneService::ToCalendarTime(HLERequestContext& ctx) { 125void ITimeZoneService::ToCalendarTime(HLERequestContext& ctx) {
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index c3c2281bb..9ff4028c2 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -479,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
479 const u32 remainder{4 - element}; 479 const u32 remainder{4 - element};
480 const TransformFeedbackVarying* xfb_varying{}; 480 const TransformFeedbackVarying* xfb_varying{};
481 const size_t xfb_varying_index{base_index + element}; 481 const size_t xfb_varying_index{base_index + element};
482 if (xfb_varying_index < runtime_info.xfb_varyings.size()) { 482 if (xfb_varying_index < runtime_info.xfb_count) {
483 xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; 483 xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
484 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; 484 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
485 } 485 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 0f86a8004..34592a01f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
387} 387}
388 388
389void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { 389void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
390 if (ctx.runtime_info.xfb_varyings.empty()) { 390 if (ctx.runtime_info.xfb_count == 0) {
391 return; 391 return;
392 } 392 }
393 ctx.AddCapability(spv::Capability::TransformFeedback); 393 ctx.AddCapability(spv::Capability::TransformFeedback);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index fd15f47ea..bec5db173 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -160,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo
160 const u32 remainder{4 - element}; 160 const u32 remainder{4 - element};
161 const TransformFeedbackVarying* xfb_varying{}; 161 const TransformFeedbackVarying* xfb_varying{};
162 const size_t xfb_varying_index{base_attr_index + element}; 162 const size_t xfb_varying_index{base_attr_index + element};
163 if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) { 163 if (xfb_varying_index < ctx.runtime_info.xfb_count) {
164 xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index]; 164 xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index];
165 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; 165 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
166 } 166 }
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 3b63c249f..619c0b138 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -84,7 +84,8 @@ struct RuntimeInfo {
84 bool glasm_use_storage_buffers{}; 84 bool glasm_use_storage_buffers{};
85 85
86 /// Transform feedback state for each varying 86 /// Transform feedback state for each varying
87 std::vector<TransformFeedbackVarying> xfb_varyings; 87 std::array<TransformFeedbackVarying, 256> xfb_varyings{};
88 u32 xfb_count{0};
88}; 89};
89 90
90} // namespace Shader 91} // namespace Shader
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9bafd8cc0..58a45ab67 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
207 if (has_new_downloads) { 207 if (has_new_downloads) {
208 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 208 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
209 } 209 }
210 tmp_buffer.resize(amount); 210 tmp_buffer.resize_destructive(amount);
211 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); 211 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
212 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); 212 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
213 return true; 213 return true;
@@ -719,9 +719,15 @@ void BufferCache<P>::BindHostVertexBuffers() {
719 bool any_valid{false}; 719 bool any_valid{false};
720 auto& flags = maxwell3d->dirty.flags; 720 auto& flags = maxwell3d->dirty.flags;
721 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 721 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
722 const Binding& binding = channel_state->vertex_buffers[index];
723 Buffer& buffer = slot_buffers[binding.buffer_id];
724 TouchBuffer(buffer, binding.buffer_id);
725 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
722 if (!flags[Dirty::VertexBuffer0 + index]) { 726 if (!flags[Dirty::VertexBuffer0 + index]) {
723 continue; 727 continue;
724 } 728 }
729 flags[Dirty::VertexBuffer0 + index] = false;
730
725 host_bindings.min_index = std::min(host_bindings.min_index, index); 731 host_bindings.min_index = std::min(host_bindings.min_index, index);
726 host_bindings.max_index = std::max(host_bindings.max_index, index); 732 host_bindings.max_index = std::max(host_bindings.max_index, index);
727 any_valid = true; 733 any_valid = true;
@@ -735,9 +741,6 @@ void BufferCache<P>::BindHostVertexBuffers() {
735 const Binding& binding = channel_state->vertex_buffers[index]; 741 const Binding& binding = channel_state->vertex_buffers[index];
736 Buffer& buffer = slot_buffers[binding.buffer_id]; 742 Buffer& buffer = slot_buffers[binding.buffer_id];
737 743
738 TouchBuffer(buffer, binding.buffer_id);
739 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
740
741 const u32 stride = maxwell3d->regs.vertex_streams[index].stride; 744 const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
742 const u32 offset = buffer.Offset(binding.cpu_addr); 745 const u32 offset = buffer.Offset(binding.cpu_addr);
743 746
@@ -1276,7 +1279,7 @@ template <class P>
1276typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, 1279typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
1277 u32 wanted_size) { 1280 u32 wanted_size) {
1278 static constexpr int STREAM_LEAP_THRESHOLD = 16; 1281 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1279 std::vector<BufferId> overlap_ids; 1282 boost::container::small_vector<BufferId, 16> overlap_ids;
1280 VAddr begin = cpu_addr; 1283 VAddr begin = cpu_addr;
1281 VAddr end = cpu_addr + wanted_size; 1284 VAddr end = cpu_addr + wanted_size;
1282 int stream_score = 0; 1285 int stream_score = 0;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 63a120f7a..fe6068cfe 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
229 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; 229 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
230 230
231 struct OverlapResult { 231 struct OverlapResult {
232 std::vector<BufferId> ids; 232 boost::container::small_vector<BufferId, 16> ids;
233 VAddr begin; 233 VAddr begin;
234 VAddr end; 234 VAddr end;
235 bool has_stream_leap = false; 235 bool has_stream_leap = false;
@@ -582,7 +582,7 @@ private:
582 BufferId inline_buffer_id; 582 BufferId inline_buffer_id;
583 583
584 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; 584 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
585 std::vector<u8> tmp_buffer; 585 Common::ScratchBuffer<u8> tmp_buffer;
586}; 586};
587 587
588} // namespace VideoCommon 588} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 83112dfce..7d660af47 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -63,7 +63,6 @@ struct ChCommand {
63}; 63};
64 64
65using ChCommandHeaderList = std::vector<ChCommandHeader>; 65using ChCommandHeaderList = std::vector<ChCommandHeader>;
66using ChCommandList = std::vector<ChCommand>;
67 66
68struct ThiRegisters { 67struct ThiRegisters {
69 u32_le increment_syncpt{}; 68 u32_le increment_syncpt{};
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1cdb690ed..8a2784cdc 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <span> 7#include <span>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9#include <queue> 10#include <queue>
10 11
11#include "common/bit_field.h" 12#include "common/bit_field.h"
@@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
102struct CommandList final { 103struct CommandList final {
103 CommandList() = default; 104 CommandList() = default;
104 explicit CommandList(std::size_t size) : command_lists(size) {} 105 explicit CommandList(std::size_t size) : command_lists(size) {}
105 explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) 106 explicit CommandList(
107 boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
106 : prefetch_command_list{std::move(prefetch_command_list_)} {} 108 : prefetch_command_list{std::move(prefetch_command_list_)} {}
107 109
108 std::vector<CommandListHeader> command_lists; 110 boost::container::small_vector<CommandListHeader, 512> command_lists;
109 std::vector<CommandHeader> prefetch_command_list; 111 boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
110}; 112};
111 113
112/** 114/**
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 0e94c521a..f34090791 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -1,6 +1,7 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/settings.h"
4#include "video_core/dirty_flags.h" 5#include "video_core/dirty_flags.h"
5#include "video_core/engines/draw_manager.h" 6#include "video_core/engines/draw_manager.h"
6#include "video_core/rasterizer_interface.h" 7#include "video_core/rasterizer_interface.h"
@@ -195,8 +196,12 @@ void DrawManager::DrawTexture() {
195 if (lower_left) { 196 if (lower_left) {
196 draw_texture_state.dst_y0 -= dst_height; 197 draw_texture_state.dst_y0 -= dst_height;
197 } 198 }
198 draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; 199 draw_texture_state.dst_x1 =
199 draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; 200 draw_texture_state.dst_x0 +
201 static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_width)));
202 draw_texture_state.dst_y1 =
203 draw_texture_state.dst_y0 +
204 static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_height)));
200 draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; 205 draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
201 draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; 206 draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
202 draw_texture_state.src_x1 = 207 draw_texture_state.src_x1 =
@@ -207,7 +212,6 @@ void DrawManager::DrawTexture() {
207 draw_texture_state.src_y0; 212 draw_texture_state.src_y0;
208 draw_texture_state.src_sampler = regs.draw_texture.src_sampler; 213 draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
209 draw_texture_state.src_texture = regs.draw_texture.src_texture; 214 draw_texture_state.src_texture = regs.draw_texture.src_texture;
210
211 maxwell3d->rasterizer->DrawTexture(); 215 maxwell3d->rasterizer->DrawTexture();
212} 216}
213 217
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ebe5536de..bc1eb41e7 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { 108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
109 ASSERT(regs.remap_const.component_size_minus_one == 3); 109 ASSERT(regs.remap_const.component_size_minus_one == 3);
110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); 110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
111 std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); 111 read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
112 std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
113 std::ranges::fill(span, regs.remap_consta_value);
112 memory_manager.WriteBlockUnsafe(regs.offset_out, 114 memory_manager.WriteBlockUnsafe(regs.offset_out,
113 reinterpret_cast<u8*>(tmp_buffer.data()), 115 reinterpret_cast<u8*>(read_buffer.data()),
114 regs.line_length_in * sizeof(u32)); 116 regs.line_length_in * sizeof(u32));
115 } else { 117 } else {
116 memory_manager.FlushCaching(); 118 memory_manager.FlushCaching();
@@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
126 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 128 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
127 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 129 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
128 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 130 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
129 std::vector<u8> tmp_buffer(16); 131 read_buffer.resize_destructive(16);
130 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
131 memory_manager.ReadBlockUnsafe( 133 memory_manager.ReadBlockUnsafe(
132 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 134 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
133 tmp_buffer.data(), tmp_buffer.size()); 135 read_buffer.data(), read_buffer.size());
134 memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), 136 memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
135 tmp_buffer.size()); 137 read_buffer.size());
136 } 138 }
137 } else if (is_src_pitch && !is_dst_pitch) { 139 } else if (is_src_pitch && !is_dst_pitch) {
138 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 140 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
139 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 141 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
140 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 142 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
141 std::vector<u8> tmp_buffer(16); 143 read_buffer.resize_destructive(16);
142 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 144 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
143 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), 145 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
144 tmp_buffer.size()); 146 read_buffer.size());
145 memory_manager.WriteBlockCached( 147 memory_manager.WriteBlockCached(
146 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 148 convert_linear_2_blocklinear_addr(regs.offset_out + offset),
147 tmp_buffer.data(), tmp_buffer.size()); 149 read_buffer.data(), read_buffer.size());
148 } 150 }
149 } else { 151 } else {
150 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { 152 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
151 std::vector<u8> tmp_buffer(regs.line_length_in); 153 read_buffer.resize_destructive(regs.line_length_in);
152 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), 154 memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
153 regs.line_length_in); 155 regs.line_length_in);
154 memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), 156 memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
155 regs.line_length_in); 157 regs.line_length_in);
156 } 158 }
157 } 159 }
@@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
171 src_operand.address = regs.offset_in; 173 src_operand.address = regs.offset_in;
172 174
173 DMA::BufferOperand dst_operand; 175 DMA::BufferOperand dst_operand;
174 dst_operand.pitch = regs.pitch_out; 176 u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
177 dst_operand.pitch = abs_pitch_out;
175 dst_operand.width = regs.line_length_in; 178 dst_operand.width = regs.line_length_in;
176 dst_operand.height = regs.line_count; 179 dst_operand.height = regs.line_count;
177 dst_operand.address = regs.offset_out; 180 dst_operand.address = regs.offset_out;
@@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
218 const size_t src_size = 221 const size_t src_size =
219 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 222 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
220 223
221 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 224 const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
222 read_buffer.resize_destructive(src_size); 225 read_buffer.resize_destructive(src_size);
223 write_buffer.resize_destructive(dst_size); 226 write_buffer.resize_destructive(dst_size);
224 227
@@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
227 230
228 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, 231 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
229 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 232 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
230 regs.pitch_out); 233 abs_pitch_out);
231 234
232 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); 235 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
233} 236}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 456f733cf..db385076d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -193,18 +193,13 @@ struct GPU::Impl {
193 } 193 }
194 194
195 [[nodiscard]] u64 GetTicks() const { 195 [[nodiscard]] u64 GetTicks() const {
196 // This values were reversed engineered by fincs from NVN 196 u64 gpu_tick = system.CoreTiming().GetGPUTicks();
197 // The gpu clock is reported in units of 385/625 nanoseconds
198 constexpr u64 gpu_ticks_num = 384;
199 constexpr u64 gpu_ticks_den = 625;
200 197
201 u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
202 if (Settings::values.use_fast_gpu_time.GetValue()) { 198 if (Settings::values.use_fast_gpu_time.GetValue()) {
203 nanoseconds /= 256; 199 gpu_tick /= 256;
204 } 200 }
205 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; 201
206 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; 202 return gpu_tick;
207 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
208 } 203 }
209 204
210 [[nodiscard]] bool IsAsync() const { 205 [[nodiscard]] bool IsAsync() const {
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 6ce179167..ce827eb6c 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -4,6 +4,7 @@
4#include <array> 4#include <array>
5#include <bit> 5#include <bit>
6 6
7#include "common/scratch_buffer.h"
7#include "common/settings.h" 8#include "common/settings.h"
8#include "video_core/host1x/codecs/h264.h" 9#include "video_core/host1x/codecs/h264.h"
9#include "video_core/host1x/host1x.h" 10#include "video_core/host1x/host1x.h"
@@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
188} 189}
189 190
190void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { 191void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
191 std::vector<u8> scan(count); 192 static Common::ScratchBuffer<u8> scan{};
193 scan.resize_destructive(count);
192 if (count == 16) { 194 if (count == 16) {
193 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); 195 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
194 } else { 196 } else {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7b2cde7a7..45141e488 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
111 [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); 111 [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
112 SetEntry<false>(current_gpu_addr, entry_type); 112 SetEntry<false>(current_gpu_addr, entry_type);
113 if (current_entry_type != entry_type) { 113 if (current_entry_type != entry_type) {
114 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); 114 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
115 } 115 }
116 if constexpr (entry_type == EntryType::Mapped) { 116 if constexpr (entry_type == EntryType::Mapped) {
117 const VAddr current_cpu_addr = cpu_addr + offset; 117 const VAddr current_cpu_addr = cpu_addr + offset;
@@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
134 [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); 134 [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
135 SetEntry<true>(current_gpu_addr, entry_type); 135 SetEntry<true>(current_gpu_addr, entry_type);
136 if (current_entry_type != entry_type) { 136 if (current_entry_type != entry_type) {
137 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); 137 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
138 } 138 }
139 if constexpr (entry_type == EntryType::Mapped) { 139 if constexpr (entry_type == EntryType::Mapped) {
140 const VAddr current_cpu_addr = cpu_addr + offset; 140 const VAddr current_cpu_addr = cpu_addr + offset;
@@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
587 587
588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, 588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
589 VideoCommon::CacheType which) { 589 VideoCommon::CacheType which) {
590 std::vector<u8> tmp_buffer(size); 590 tmp_buffer.resize_destructive(size);
591 ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); 591 ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
592 592
593 // The output block must be flushed in case it has data modified from the GPU. 593 // The output block must be flushed in case it has data modified from the GPU.
@@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
670 return result; 670 return result;
671} 671}
672 672
673std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( 673boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32>
674 GPUVAddr gpu_addr, std::size_t size) const { 674MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
675 std::vector<std::pair<GPUVAddr, std::size_t>> result{}; 675 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{};
676 GetSubmappedRangeImpl<true>(gpu_addr, size, result); 676 GetSubmappedRangeImpl<true>(gpu_addr, size, result);
677 return result; 677 return result;
678} 678}
@@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
680template <bool is_gpu_address> 680template <bool is_gpu_address>
681void MemoryManager::GetSubmappedRangeImpl( 681void MemoryManager::GetSubmappedRangeImpl(
682 GPUVAddr gpu_addr, std::size_t size, 682 GPUVAddr gpu_addr, std::size_t size,
683 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& 683 boost::container::small_vector<
684 result) const { 684 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result)
685 const {
685 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> 686 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
686 last_segment{}; 687 last_segment{};
687 std::optional<VAddr> old_page_addr{}; 688 std::optional<VAddr> old_page_addr{};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 794535122..4202c26ff 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -8,10 +8,12 @@
8#include <mutex> 8#include <mutex>
9#include <optional> 9#include <optional>
10#include <vector> 10#include <vector>
11#include <boost/container/small_vector.hpp>
11 12
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/multi_level_page_table.h" 14#include "common/multi_level_page_table.h"
14#include "common/range_map.h" 15#include "common/range_map.h"
16#include "common/scratch_buffer.h"
15#include "common/virtual_buffer.h" 17#include "common/virtual_buffer.h"
16#include "video_core/cache_types.h" 18#include "video_core/cache_types.h"
17#include "video_core/pte_kind.h" 19#include "video_core/pte_kind.h"
@@ -107,8 +109,8 @@ public:
107 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty 109 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty
108 * vector will be returned; 110 * vector will be returned;
109 */ 111 */
110 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, 112 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
111 std::size_t size) const; 113 GPUVAddr gpu_addr, std::size_t size) const;
112 114
113 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, 115 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
114 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); 116 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@@ -165,7 +167,8 @@ private:
165 template <bool is_gpu_address> 167 template <bool is_gpu_address>
166 void GetSubmappedRangeImpl( 168 void GetSubmappedRangeImpl(
167 GPUVAddr gpu_addr, std::size_t size, 169 GPUVAddr gpu_addr, std::size_t size,
168 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& 170 boost::container::small_vector<
171 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>&
169 result) const; 172 result) const;
170 173
171 Core::System& system; 174 Core::System& system;
@@ -215,8 +218,8 @@ private:
215 Common::VirtualBuffer<u32> big_page_table_cpu; 218 Common::VirtualBuffer<u32> big_page_table_cpu;
216 219
217 std::vector<u64> big_page_continuous; 220 std::vector<u64> big_page_continuous;
218 std::vector<std::pair<VAddr, std::size_t>> page_stash{}; 221 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{};
219 std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; 222 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{};
220 223
221 mutable std::mutex guard; 224 mutable std::mutex guard;
222 225
@@ -226,6 +229,8 @@ private:
226 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; 229 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
227 230
228 static std::atomic<size_t> unique_identifier_generator; 231 static std::atomic<size_t> unique_identifier_generator;
232
233 Common::ScratchBuffer<u8> tmp_buffer;
229}; 234};
230 235
231} // namespace Tegra 236} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 1a0cea9b7..3151c0db8 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -87,7 +87,8 @@ void ComputePipeline::Configure() {
87 texture_cache.SynchronizeComputeDescriptors(); 87 texture_cache.SynchronizeComputeDescriptors();
88 88
89 boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; 89 boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
90 std::array<GLuint, MAX_TEXTURES> samplers; 90 boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
91 std::array<GLuint, MAX_TEXTURES> gl_samplers;
91 std::array<GLuint, MAX_TEXTURES> textures; 92 std::array<GLuint, MAX_TEXTURES> textures;
92 std::array<GLuint, MAX_IMAGES> images; 93 std::array<GLuint, MAX_IMAGES> images;
93 GLsizei sampler_binding{}; 94 GLsizei sampler_binding{};
@@ -131,7 +132,6 @@ void ComputePipeline::Configure() {
131 for (u32 index = 0; index < desc.count; ++index) { 132 for (u32 index = 0; index < desc.count; ++index) {
132 const auto handle{read_handle(desc, index)}; 133 const auto handle{read_handle(desc, index)};
133 views.push_back({handle.first}); 134 views.push_back({handle.first});
134 samplers[sampler_binding++] = 0;
135 } 135 }
136 } 136 }
137 for (const auto& desc : info.image_buffer_descriptors) { 137 for (const auto& desc : info.image_buffer_descriptors) {
@@ -142,8 +142,8 @@ void ComputePipeline::Configure() {
142 const auto handle{read_handle(desc, index)}; 142 const auto handle{read_handle(desc, index)};
143 views.push_back({handle.first}); 143 views.push_back({handle.first});
144 144
145 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); 145 VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
146 samplers[sampler_binding++] = sampler->Handle(); 146 samplers.push_back(sampler);
147 } 147 }
148 } 148 }
149 for (const auto& desc : info.image_descriptors) { 149 for (const auto& desc : info.image_descriptors) {
@@ -186,10 +186,17 @@ void ComputePipeline::Configure() {
186 186
187 const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + 187 const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
188 num_image_buffers}; 188 num_image_buffers};
189 const VideoCommon::SamplerId* samplers_it{samplers.data()};
189 texture_binding += num_texture_buffers; 190 texture_binding += num_texture_buffers;
190 image_binding += num_image_buffers; 191 image_binding += num_image_buffers;
191 192
192 u32 texture_scaling_mask{}; 193 u32 texture_scaling_mask{};
194
195 for (const auto& desc : info.texture_buffer_descriptors) {
196 for (u32 index = 0; index < desc.count; ++index) {
197 gl_samplers[sampler_binding++] = 0;
198 }
199 }
193 for (const auto& desc : info.texture_descriptors) { 200 for (const auto& desc : info.texture_descriptors) {
194 for (u32 index = 0; index < desc.count; ++index) { 201 for (u32 index = 0; index < desc.count; ++index) {
195 ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; 202 ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -198,6 +205,12 @@ void ComputePipeline::Configure() {
198 texture_scaling_mask |= 1u << texture_binding; 205 texture_scaling_mask |= 1u << texture_binding;
199 } 206 }
200 ++texture_binding; 207 ++texture_binding;
208
209 const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
210 const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
211 !image_view.SupportsAnisotropy()};
212 gl_samplers[sampler_binding++] =
213 use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
201 } 214 }
202 } 215 }
203 u32 image_scaling_mask{}; 216 u32 image_scaling_mask{};
@@ -228,7 +241,7 @@ void ComputePipeline::Configure() {
228 if (texture_binding != 0) { 241 if (texture_binding != 0) {
229 ASSERT(texture_binding == sampler_binding); 242 ASSERT(texture_binding == sampler_binding);
230 glBindTextures(0, texture_binding, textures.data()); 243 glBindTextures(0, texture_binding, textures.data());
231 glBindSamplers(0, sampler_binding, samplers.data()); 244 glBindSamplers(0, sampler_binding, gl_samplers.data());
232 } 245 }
233 if (image_binding != 0) { 246 if (image_binding != 0) {
234 glBindImageTextures(0, image_binding, images.data()); 247 glBindImageTextures(0, image_binding, images.data());
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 89000d6e0..c58f760b8 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -275,9 +275,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
275template <typename Spec> 275template <typename Spec>
276void GraphicsPipeline::ConfigureImpl(bool is_indexed) { 276void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
277 std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; 277 std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
278 std::array<GLuint, MAX_TEXTURES> samplers; 278 std::array<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
279 size_t views_index{}; 279 size_t views_index{};
280 GLsizei sampler_binding{}; 280 size_t samplers_index{};
281 281
282 texture_cache.SynchronizeGraphicsDescriptors(); 282 texture_cache.SynchronizeGraphicsDescriptors();
283 283
@@ -337,7 +337,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
337 for (u32 index = 0; index < desc.count; ++index) { 337 for (u32 index = 0; index < desc.count; ++index) {
338 const auto handle{read_handle(desc, index)}; 338 const auto handle{read_handle(desc, index)};
339 views[views_index++] = {handle.first}; 339 views[views_index++] = {handle.first};
340 samplers[sampler_binding++] = 0;
341 } 340 }
342 } 341 }
343 } 342 }
@@ -351,8 +350,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
351 const auto handle{read_handle(desc, index)}; 350 const auto handle{read_handle(desc, index)};
352 views[views_index++] = {handle.first}; 351 views[views_index++] = {handle.first};
353 352
354 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; 353 VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
355 samplers[sampler_binding++] = sampler->Handle(); 354 samplers[samplers_index++] = sampler;
356 } 355 }
357 } 356 }
358 if constexpr (Spec::has_images) { 357 if constexpr (Spec::has_images) {
@@ -445,10 +444,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
445 program_manager.BindSourcePrograms(source_programs); 444 program_manager.BindSourcePrograms(source_programs);
446 } 445 }
447 const VideoCommon::ImageViewInOut* views_it{views.data()}; 446 const VideoCommon::ImageViewInOut* views_it{views.data()};
447 const VideoCommon::SamplerId* samplers_it{samplers.data()};
448 GLsizei texture_binding = 0; 448 GLsizei texture_binding = 0;
449 GLsizei image_binding = 0; 449 GLsizei image_binding = 0;
450 GLsizei sampler_binding{};
450 std::array<GLuint, MAX_TEXTURES> textures; 451 std::array<GLuint, MAX_TEXTURES> textures;
451 std::array<GLuint, MAX_IMAGES> images; 452 std::array<GLuint, MAX_IMAGES> images;
453 std::array<GLuint, MAX_TEXTURES> gl_samplers;
452 const auto prepare_stage{[&](size_t stage) { 454 const auto prepare_stage{[&](size_t stage) {
453 buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); 455 buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
454 buffer_cache.BindHostStageBuffers(stage); 456 buffer_cache.BindHostStageBuffers(stage);
@@ -465,6 +467,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
465 u32 stage_image_binding{}; 467 u32 stage_image_binding{};
466 468
467 const auto& info{stage_infos[stage]}; 469 const auto& info{stage_infos[stage]};
470 if constexpr (Spec::has_texture_buffers) {
471 for (const auto& desc : info.texture_buffer_descriptors) {
472 for (u32 index = 0; index < desc.count; ++index) {
473 gl_samplers[sampler_binding++] = 0;
474 }
475 }
476 }
468 for (const auto& desc : info.texture_descriptors) { 477 for (const auto& desc : info.texture_descriptors) {
469 for (u32 index = 0; index < desc.count; ++index) { 478 for (u32 index = 0; index < desc.count; ++index) {
470 ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; 479 ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -474,6 +483,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
474 } 483 }
475 ++texture_binding; 484 ++texture_binding;
476 ++stage_texture_binding; 485 ++stage_texture_binding;
486
487 const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
488 const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
489 !image_view.SupportsAnisotropy()};
490 gl_samplers[sampler_binding++] =
491 use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
477 } 492 }
478 } 493 }
479 for (const auto& desc : info.image_descriptors) { 494 for (const auto& desc : info.image_descriptors) {
@@ -534,7 +549,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
534 if (texture_binding != 0) { 549 if (texture_binding != 0) {
535 ASSERT(texture_binding == sampler_binding); 550 ASSERT(texture_binding == sampler_binding);
536 glBindTextures(0, texture_binding, textures.data()); 551 glBindTextures(0, texture_binding, textures.data());
537 glBindSamplers(0, sampler_binding, samplers.data()); 552 glBindSamplers(0, sampler_binding, gl_samplers.data());
538 } 553 }
539 if (image_binding != 0) { 554 if (image_binding != 0) {
540 glBindImageTextures(0, image_binding, images.data()); 555 glBindImageTextures(0, image_binding, images.data());
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3f077311e..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
85 case Shader::Stage::VertexB: 85 case Shader::Stage::VertexB:
86 case Shader::Stage::Geometry: 86 case Shader::Stage::Geometry:
87 if (!use_assembly_shaders && key.xfb_enabled != 0) { 87 if (!use_assembly_shaders && key.xfb_enabled != 0) {
88 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); 88 auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
89 info.xfb_varyings = varyings;
90 info.xfb_count = count;
89 } 91 }
90 break; 92 break;
91 case Shader::Stage::TessellationEval: 93 case Shader::Stage::TessellationEval:
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 1c5dbcdd8..3b446be07 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1268,36 +1268,48 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
1268 1268
1269 UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); 1269 UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
1270 1270
1271 sampler.Create(); 1271 const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
1272 const GLuint handle = sampler.handle; 1272
1273 glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); 1273 const auto create_sampler = [&](const f32 anisotropy) {
1274 glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); 1274 OGLSampler new_sampler;
1275 glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); 1275 new_sampler.Create();
1276 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); 1276 const GLuint handle = new_sampler.handle;
1277 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); 1277 glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
1278 glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); 1278 glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
1279 glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); 1279 glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
1280 glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); 1280 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
1281 glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); 1281 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
1282 glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); 1282 glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
1283 glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); 1283 glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
1284 1284 glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
1285 if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { 1285 glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
1286 const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); 1286 glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
1287 glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); 1287 glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
1288 } else { 1288
1289 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); 1289 if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
1290 } 1290 glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, anisotropy);
1291 if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { 1291 } else {
1292 glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); 1292 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
1293 } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { 1293 }
1294 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); 1294 if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
1295 } 1295 glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
1296 if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { 1296 } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
1297 glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); 1297 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
1298 } else if (seamless == GL_FALSE) { 1298 }
1299 // We default to false because it's more common 1299 if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
1300 LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); 1300 glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
1301 } else if (seamless == GL_FALSE) {
1302 // We default to false because it's more common
1303 LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
1304 }
1305 return new_sampler;
1306 };
1307
1308 sampler = create_sampler(max_anisotropy);
1309
1310 const f32 max_anisotropy_default = static_cast<f32>(1U << config.max_anisotropy);
1311 if (max_anisotropy > max_anisotropy_default) {
1312 sampler_default_anisotropy = create_sampler(max_anisotropy_default);
1301 } 1313 }
1302} 1314}
1303 1315
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1148b73d7..3676eaaa9 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -309,12 +309,21 @@ class Sampler {
309public: 309public:
310 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); 310 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
311 311
312 GLuint Handle() const noexcept { 312 [[nodiscard]] GLuint Handle() const noexcept {
313 return sampler.handle; 313 return sampler.handle;
314 } 314 }
315 315
316 [[nodiscard]] GLuint HandleWithDefaultAnisotropy() const noexcept {
317 return sampler_default_anisotropy.handle;
318 }
319
320 [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
321 return static_cast<bool>(sampler_default_anisotropy.handle);
322 }
323
316private: 324private:
317 OGLSampler sampler; 325 OGLSampler sampler;
326 OGLSampler sampler_default_anisotropy;
318}; 327};
319 328
320class Framebuffer { 329class Framebuffer {
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 983e1c2e1..71c783709 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -178,7 +178,7 @@ public:
178inline void PushImageDescriptors(TextureCache& texture_cache, 178inline void PushImageDescriptors(TextureCache& texture_cache,
179 GuestDescriptorQueue& guest_descriptor_queue, 179 GuestDescriptorQueue& guest_descriptor_queue,
180 const Shader::Info& info, RescalingPushConstant& rescaling, 180 const Shader::Info& info, RescalingPushConstant& rescaling,
181 const VkSampler*& samplers, 181 const VideoCommon::SamplerId*& samplers,
182 const VideoCommon::ImageViewInOut*& views) { 182 const VideoCommon::ImageViewInOut*& views) {
183 const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); 183 const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
184 const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); 184 const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
@@ -187,10 +187,15 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
187 for (const auto& desc : info.texture_descriptors) { 187 for (const auto& desc : info.texture_descriptors) {
188 for (u32 index = 0; index < desc.count; ++index) { 188 for (u32 index = 0; index < desc.count; ++index) {
189 const VideoCommon::ImageViewId image_view_id{(views++)->id}; 189 const VideoCommon::ImageViewId image_view_id{(views++)->id};
190 const VkSampler sampler{*(samplers++)}; 190 const VideoCommon::SamplerId sampler_id{*(samplers++)};
191 ImageView& image_view{texture_cache.GetImageView(image_view_id)}; 191 ImageView& image_view{texture_cache.GetImageView(image_view_id)};
192 const VkImageView vk_image_view{image_view.Handle(desc.type)}; 192 const VkImageView vk_image_view{image_view.Handle(desc.type)};
193 guest_descriptor_queue.AddSampledImage(vk_image_view, sampler); 193 const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
194 const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
195 !image_view.SupportsAnisotropy()};
196 const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
197 : sampler.Handle()};
198 guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
194 rescaling.PushTexture(texture_cache.IsRescaling(image_view)); 199 rescaling.PushTexture(texture_cache.IsRescaling(image_view));
195 } 200 }
196 } 201 }
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8c33722d3..f47301ad5 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
361 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, 361 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
362 }; 362 };
363 // Measuring a popular game, this number never exceeds the specified size once data is warmed up 363 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
364 boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); 364 boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
365 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); 365 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
366 scheduler.RequestOutsideRenderPassOperationContext(); 366 scheduler.RequestOutsideRenderPassOperationContext();
367 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { 367 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
@@ -516,15 +516,15 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
516 buffer_handles.push_back(handle); 516 buffer_handles.push_back(handle);
517 } 517 }
518 if (device.IsExtExtendedDynamicStateSupported()) { 518 if (device.IsExtExtendedDynamicStateSupported()) {
519 scheduler.Record([bindings = bindings, 519 scheduler.Record([bindings = std::move(bindings),
520 buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { 520 buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
521 cmdbuf.BindVertexBuffers2EXT( 521 cmdbuf.BindVertexBuffers2EXT(
522 bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), 522 bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(),
523 bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data()); 523 bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data());
524 }); 524 });
525 } else { 525 } else {
526 scheduler.Record([bindings = bindings, 526 scheduler.Record([bindings = std::move(bindings),
527 buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { 527 buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
528 cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index, 528 cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index,
529 buffer_handles.data(), bindings.offsets.data()); 529 buffer_handles.data(), bindings.offsets.data());
530 }); 530 });
@@ -561,12 +561,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
561 for (u32 index = 0; index < bindings.buffers.size(); ++index) { 561 for (u32 index = 0; index < bindings.buffers.size(); ++index) {
562 buffer_handles.push_back(bindings.buffers[index]->Handle()); 562 buffer_handles.push_back(bindings.buffers[index]->Handle());
563 } 563 }
564 scheduler.Record( 564 scheduler.Record([bindings = std::move(bindings),
565 [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { 565 buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
566 cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()), 566 cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()),
567 buffer_handles.data(), bindings.offsets.data(), 567 buffer_handles.data(), bindings.offsets.data(),
568 bindings.sizes.data()); 568 bindings.sizes.data());
569 }); 569 });
570} 570}
571 571
572void BufferCacheRuntime::ReserveNullBuffer() { 572void BufferCacheRuntime::ReserveNullBuffer() {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 733e70d9d..73e585c2b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -115,7 +115,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
115 115
116 static constexpr size_t max_elements = 64; 116 static constexpr size_t max_elements = 64;
117 boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views; 117 boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
118 boost::container::static_vector<VkSampler, max_elements> samplers; 118 boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers;
119 119
120 const auto& qmd{kepler_compute.launch_description}; 120 const auto& qmd{kepler_compute.launch_description};
121 const auto& cbufs{qmd.const_buffer_config}; 121 const auto& cbufs{qmd.const_buffer_config};
@@ -160,8 +160,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
160 const auto handle{read_handle(desc, index)}; 160 const auto handle{read_handle(desc, index)};
161 views.push_back({handle.first}); 161 views.push_back({handle.first});
162 162
163 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); 163 VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
164 samplers.push_back(sampler->Handle()); 164 samplers.push_back(sampler);
165 } 165 }
166 } 166 }
167 for (const auto& desc : info.image_descriptors) { 167 for (const auto& desc : info.image_descriptors) {
@@ -192,7 +192,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
192 buffer_cache.BindHostComputeBuffers(); 192 buffer_cache.BindHostComputeBuffers();
193 193
194 RescalingPushConstant rescaling; 194 RescalingPushConstant rescaling;
195 const VkSampler* samplers_it{samplers.data()}; 195 const VideoCommon::SamplerId* samplers_it{samplers.data()};
196 const VideoCommon::ImageViewInOut* views_it{views.data()}; 196 const VideoCommon::ImageViewInOut* views_it{views.data()};
197 PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it, 197 PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it,
198 views_it); 198 views_it);
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 506b78f08..c1595642e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -298,7 +298,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
298template <typename Spec> 298template <typename Spec>
299void GraphicsPipeline::ConfigureImpl(bool is_indexed) { 299void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
300 std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views; 300 std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
301 std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; 301 std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers;
302 size_t sampler_index{}; 302 size_t sampler_index{};
303 size_t view_index{}; 303 size_t view_index{};
304 304
@@ -367,8 +367,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
367 const auto handle{read_handle(desc, index)}; 367 const auto handle{read_handle(desc, index)};
368 views[view_index++] = {handle.first}; 368 views[view_index++] = {handle.first};
369 369
370 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; 370 VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
371 samplers[sampler_index++] = sampler->Handle(); 371 samplers[sampler_index++] = sampler;
372 } 372 }
373 } 373 }
374 if constexpr (Spec::has_images) { 374 if constexpr (Spec::has_images) {
@@ -453,7 +453,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
453 453
454 RescalingPushConstant rescaling; 454 RescalingPushConstant rescaling;
455 RenderAreaPushConstant render_area; 455 RenderAreaPushConstant render_area;
456 const VkSampler* samplers_it{samplers.data()}; 456 const VideoCommon::SamplerId* samplers_it{samplers.data()};
457 const VideoCommon::ImageViewInOut* views_it{views.data()}; 457 const VideoCommon::ImageViewInOut* views_it{views.data()};
458 const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { 458 const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
459 buffer_cache.BindHostStageBuffers(stage); 459 buffer_cache.BindHostStageBuffers(stage);
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index b128c4f6e..5eeda08d2 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -3,6 +3,7 @@
3 3
4#include <thread> 4#include <thread>
5 5
6#include "common/polyfill_ranges.h"
6#include "common/settings.h" 7#include "common/settings.h"
7#include "video_core/renderer_vulkan/vk_master_semaphore.h" 8#include "video_core/renderer_vulkan/vk_master_semaphore.h"
8#include "video_core/vulkan_common/vulkan_device.h" 9#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 18e040a1b..9f316113c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
167 info.fixed_state_point_size = point_size; 167 info.fixed_state_point_size = point_size;
168 } 168 }
169 if (key.state.xfb_enabled) { 169 if (key.state.xfb_enabled) {
170 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); 170 auto [varyings, count] =
171 VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
172 info.xfb_varyings = varyings;
173 info.xfb_count = count;
171 } 174 }
172 info.convert_depth_mode = gl_ndc; 175 info.convert_depth_mode = gl_ndc;
173 } 176 }
@@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
214 info.fixed_state_point_size = point_size; 217 info.fixed_state_point_size = point_size;
215 } 218 }
216 if (key.state.xfb_enabled != 0) { 219 if (key.state.xfb_enabled != 0) {
217 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); 220 auto [varyings, count] =
221 VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
222 info.xfb_varyings = varyings;
223 info.xfb_count = count;
218 } 224 }
219 info.convert_depth_mode = gl_ndc; 225 info.convert_depth_mode = gl_ndc;
220 break; 226 break;
@@ -705,10 +711,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
705std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( 711std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
706 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, 712 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
707 PipelineStatistics* statistics, bool build_in_parallel) try { 713 PipelineStatistics* statistics, bool build_in_parallel) try {
708 // TODO: Remove this when Intel fixes their shader compiler. 714 if (device.HasBrokenCompute()) {
709 // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
710 if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS &&
711 !Settings::values.enable_compute_pipelines.GetValue()) {
712 LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); 715 LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
713 return nullptr; 716 return nullptr;
714 } 717 }
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8711e2a87..f3cef09dd 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
330 }; 330 };
331} 331}
332 332
333[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( 333[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
334 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { 334TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
335 std::vector<VkBufferCopy> result(copies.size()); 335 boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
336 std::ranges::transform( 336 std::ranges::transform(
337 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { 337 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
338 return VkBufferCopy{ 338 return VkBufferCopy{
@@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
344 return result; 344 return result;
345} 345}
346 346
347[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( 347[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
348 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { 348 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
349 struct Maker { 349 struct Maker {
350 VkBufferImageCopy operator()(const BufferImageCopy& copy) const { 350 VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
377 VkImageAspectFlags aspect_mask; 377 VkImageAspectFlags aspect_mask;
378 }; 378 };
379 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 379 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
380 std::vector<VkBufferImageCopy> result(copies.size() * 2); 380 boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
381 std::ranges::transform(copies, result.begin(), 381 std::ranges::transform(copies, result.begin(),
382 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); 382 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
383 std::ranges::transform(copies, result.begin() + copies.size(), 383 std::ranges::transform(copies, result.begin() + copies.size(),
384 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); 384 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
385 return result; 385 return result;
386 } else { 386 } else {
387 std::vector<VkBufferImageCopy> result(copies.size()); 387 boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
388 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); 388 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
389 return result; 389 return result;
390 } 390 }
@@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
867 867
868void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, 868void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
869 std::span<const VideoCommon::ImageCopy> copies) { 869 std::span<const VideoCommon::ImageCopy> copies) {
870 std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); 870 boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
871 std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); 871 boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
872 const VkImageAspectFlags src_aspect_mask = src.AspectMask(); 872 const VkImageAspectFlags src_aspect_mask = src.AspectMask();
873 const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); 873 const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
874 874
@@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
1157 1157
1158void TextureCacheRuntime::CopyImage(Image& dst, Image& src, 1158void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
1159 std::span<const VideoCommon::ImageCopy> copies) { 1159 std::span<const VideoCommon::ImageCopy> copies) {
1160 std::vector<VkImageCopy> vk_copies(copies.size()); 1160 boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
1161 const VkImageAspectFlags aspect_mask = dst.AspectMask(); 1161 const VkImageAspectFlags aspect_mask = dst.AspectMask();
1162 ASSERT(aspect_mask == src.AspectMask()); 1162 ASSERT(aspect_mask == src.AspectMask());
1163 1163
@@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
1332 ScaleDown(true); 1332 ScaleDown(true);
1333 } 1333 }
1334 scheduler->RequestOutsideRenderPassOperationContext(); 1334 scheduler->RequestOutsideRenderPassOperationContext();
1335 std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); 1335 auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
1336 const VkBuffer src_buffer = buffer; 1336 const VkBuffer src_buffer = buffer;
1337 const VkImage vk_image = *original_image; 1337 const VkImage vk_image = *original_image;
1338 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 1338 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
1367 if (is_rescaled) { 1367 if (is_rescaled) {
1368 ScaleDown(); 1368 ScaleDown();
1369 } 1369 }
1370 boost::container::small_vector<VkBuffer, 1> buffers_vector{}; 1370 boost::container::small_vector<VkBuffer, 8> buffers_vector{};
1371 boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; 1371 boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
1372 vk_copies;
1372 for (size_t index = 0; index < buffers_span.size(); index++) { 1373 for (size_t index = 0; index < buffers_span.size(); index++) {
1373 buffers_vector.emplace_back(buffers_span[index]); 1374 buffers_vector.emplace_back(buffers_span[index]);
1374 vk_copies.emplace_back( 1375 vk_copies.emplace_back(
@@ -1802,27 +1803,36 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
1802 // Some games have samplers with garbage. Sanitize them here. 1803 // Some games have samplers with garbage. Sanitize them here.
1803 const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); 1804 const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
1804 1805
1805 sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ 1806 const auto create_sampler = [&](const f32 anisotropy) {
1806 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, 1807 return device.GetLogical().CreateSampler(VkSamplerCreateInfo{
1807 .pNext = pnext, 1808 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
1808 .flags = 0, 1809 .pNext = pnext,
1809 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), 1810 .flags = 0,
1810 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), 1811 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
1811 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), 1812 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
1812 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), 1813 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
1813 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), 1814 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
1814 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), 1815 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
1815 .mipLodBias = tsc.LodBias(), 1816 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
1816 .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), 1817 .mipLodBias = tsc.LodBias(),
1817 .maxAnisotropy = max_anisotropy, 1818 .anisotropyEnable = static_cast<VkBool32>(anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
1818 .compareEnable = tsc.depth_compare_enabled, 1819 .maxAnisotropy = anisotropy,
1819 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), 1820 .compareEnable = tsc.depth_compare_enabled,
1820 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), 1821 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
1821 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), 1822 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
1822 .borderColor = 1823 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
1823 arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), 1824 .borderColor =
1824 .unnormalizedCoordinates = VK_FALSE, 1825 arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
1825 }); 1826 .unnormalizedCoordinates = VK_FALSE,
1827 });
1828 };
1829
1830 sampler = create_sampler(max_anisotropy);
1831
1832 const f32 max_anisotropy_default = static_cast<f32>(1U << tsc.max_anisotropy);
1833 if (max_anisotropy > max_anisotropy_default) {
1834 sampler_default_anisotropy = create_sampler(max_anisotropy_default);
1835 }
1826} 1836}
1827 1837
1828Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, 1838Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
@@ -1849,7 +1859,7 @@ Framebuffer::~Framebuffer() = default;
1849void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, 1859void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
1850 std::span<ImageView*, NUM_RT> color_buffers, 1860 std::span<ImageView*, NUM_RT> color_buffers,
1851 ImageView* depth_buffer, bool is_rescaled) { 1861 ImageView* depth_buffer, bool is_rescaled) {
1852 std::vector<VkImageView> attachments; 1862 boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
1853 RenderPassKey renderpass_key{}; 1863 RenderPassKey renderpass_key{};
1854 s32 num_layers = 1; 1864 s32 num_layers = 1;
1855 1865
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 0f7a5ffd4..f14525dcb 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -279,8 +279,17 @@ public:
279 return *sampler; 279 return *sampler;
280 } 280 }
281 281
282 [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept {
283 return *sampler_default_anisotropy;
284 }
285
286 [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
287 return static_cast<bool>(sampler_default_anisotropy);
288 }
289
282private: 290private:
283 vk::Sampler sampler; 291 vk::Sampler sampler;
292 vk::Sampler sampler_default_anisotropy;
284}; 293};
285 294
286class Framebuffer { 295class Framebuffer {
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index c5213875b..4db948b6d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
151 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), 151 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
152 marked_for_removal.end()); 152 marked_for_removal.end());
153 153
154 std::vector<ShaderInfo*> removed_shaders; 154 boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
155 removed_shaders.reserve(marked_for_removal.size());
156 155
157 std::scoped_lock lock{lookup_mutex}; 156 std::scoped_lock lock{lookup_mutex};
158
159 for (Entry* const entry : marked_for_removal) { 157 for (Entry* const entry : marked_for_removal) {
160 removed_shaders.push_back(entry->data); 158 removed_shaders.push_back(entry->data);
161 159
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1b8a17ee8..55d49d017 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9 10
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -108,8 +109,8 @@ struct ImageBase {
108 std::vector<ImageViewInfo> image_view_infos; 109 std::vector<ImageViewInfo> image_view_infos;
109 std::vector<ImageViewId> image_view_ids; 110 std::vector<ImageViewId> image_view_ids;
110 111
111 std::vector<u32> slice_offsets; 112 boost::container::small_vector<u32, 16> slice_offsets;
112 std::vector<SubresourceBase> slice_subresources; 113 boost::container::small_vector<SubresourceBase, 16> slice_subresources;
113 114
114 std::vector<AliasedImage> aliased_images; 115 std::vector<AliasedImage> aliased_images;
115 std::vector<ImageId> overlapping_images; 116 std::vector<ImageId> overlapping_images;
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index d134b6738..0c5f4450d 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -45,4 +45,56 @@ ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_in
45 45
46ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {} 46ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {}
47 47
48bool ImageViewBase::SupportsAnisotropy() const noexcept {
49 const bool has_mips = range.extent.levels > 1;
50 const bool is_2d = type == ImageViewType::e2D || type == ImageViewType::e2DArray;
51 if (!has_mips || !is_2d) {
52 return false;
53 }
54
55 switch (format) {
56 case PixelFormat::R8_UNORM:
57 case PixelFormat::R8_SNORM:
58 case PixelFormat::R8_SINT:
59 case PixelFormat::R8_UINT:
60 case PixelFormat::BC4_UNORM:
61 case PixelFormat::BC4_SNORM:
62 case PixelFormat::BC5_UNORM:
63 case PixelFormat::BC5_SNORM:
64 case PixelFormat::R32G32_FLOAT:
65 case PixelFormat::R32G32_SINT:
66 case PixelFormat::R32_FLOAT:
67 case PixelFormat::R16_FLOAT:
68 case PixelFormat::R16_UNORM:
69 case PixelFormat::R16_SNORM:
70 case PixelFormat::R16_UINT:
71 case PixelFormat::R16_SINT:
72 case PixelFormat::R16G16_UNORM:
73 case PixelFormat::R16G16_FLOAT:
74 case PixelFormat::R16G16_UINT:
75 case PixelFormat::R16G16_SINT:
76 case PixelFormat::R16G16_SNORM:
77 case PixelFormat::R8G8_UNORM:
78 case PixelFormat::R8G8_SNORM:
79 case PixelFormat::R8G8_SINT:
80 case PixelFormat::R8G8_UINT:
81 case PixelFormat::R32G32_UINT:
82 case PixelFormat::R32_UINT:
83 case PixelFormat::R32_SINT:
84 case PixelFormat::G4R4_UNORM:
85 // Depth formats
86 case PixelFormat::D32_FLOAT:
87 case PixelFormat::D16_UNORM:
88 // Stencil formats
89 case PixelFormat::S8_UINT:
90 // DepthStencil formats
91 case PixelFormat::D24_UNORM_S8_UINT:
92 case PixelFormat::S8_UINT_D24_UNORM:
93 case PixelFormat::D32_FLOAT_S8_UINT:
94 return false;
95 default:
96 return true;
97 }
98}
99
48} // namespace VideoCommon 100} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index a25ae1d4a..87549ffff 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -33,6 +33,8 @@ struct ImageViewBase {
33 return type == ImageViewType::Buffer; 33 return type == ImageViewType::Buffer;
34 } 34 }
35 35
36 [[nodiscard]] bool SupportsAnisotropy() const noexcept;
37
36 ImageId image_id{}; 38 ImageId image_id{};
37 GPUVAddr gpu_addr = 0; 39 GPUVAddr gpu_addr = 0;
38 PixelFormat format{}; 40 PixelFormat format{};
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7f7448e9..d3f03a995 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
186 186
187template <class P> 187template <class P>
188void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { 188void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
189 if (!Settings::values.barrier_feedback_loops.GetValue()) {
190 return;
191 }
192
189 const bool requires_barrier = [&] { 193 const bool requires_barrier = [&] {
190 for (const auto& view : views) { 194 for (const auto& view : views) {
191 if (!view.id) { 195 if (!view.id) {
@@ -222,30 +226,50 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
222 226
223template <class P> 227template <class P>
224typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 228typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
229 return &slot_samplers[GetGraphicsSamplerId(index)];
230}
231
232template <class P>
233typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
234 return &slot_samplers[GetComputeSamplerId(index)];
235}
236
237template <class P>
238SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
225 if (index > channel_state->graphics_sampler_table.Limit()) { 239 if (index > channel_state->graphics_sampler_table.Limit()) {
226 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 240 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
227 return &slot_samplers[NULL_SAMPLER_ID]; 241 return NULL_SAMPLER_ID;
228 } 242 }
229 const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); 243 const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
230 SamplerId& id = channel_state->graphics_sampler_ids[index]; 244 SamplerId& id = channel_state->graphics_sampler_ids[index];
231 if (is_new) { 245 if (is_new) {
232 id = FindSampler(descriptor); 246 id = FindSampler(descriptor);
233 } 247 }
234 return &slot_samplers[id]; 248 return id;
235} 249}
236 250
237template <class P> 251template <class P>
238typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { 252SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
239 if (index > channel_state->compute_sampler_table.Limit()) { 253 if (index > channel_state->compute_sampler_table.Limit()) {
240 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 254 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
241 return &slot_samplers[NULL_SAMPLER_ID]; 255 return NULL_SAMPLER_ID;
242 } 256 }
243 const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); 257 const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
244 SamplerId& id = channel_state->compute_sampler_ids[index]; 258 SamplerId& id = channel_state->compute_sampler_ids[index];
245 if (is_new) { 259 if (is_new) {
246 id = FindSampler(descriptor); 260 id = FindSampler(descriptor);
247 } 261 }
248 return &slot_samplers[id]; 262 return id;
263}
264
265template <class P>
266const typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) const noexcept {
267 return slot_samplers[id];
268}
269
270template <class P>
271typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
272 return slot_samplers[id];
249} 273}
250 274
251template <class P> 275template <class P>
@@ -280,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
280} 304}
281 305
282template <class P> 306template <class P>
283bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { 307bool TextureCache<P>::RescaleRenderTargets() {
284 auto& flags = maxwell3d->dirty.flags; 308 auto& flags = maxwell3d->dirty.flags;
285 u32 scale_rating = 0; 309 u32 scale_rating = 0;
286 bool rescaled = false; 310 bool rescaled = false;
@@ -318,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
318 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; 342 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
319 if (flags[Dirty::ColorBuffer0 + index] || force) { 343 if (flags[Dirty::ColorBuffer0 + index] || force) {
320 flags[Dirty::ColorBuffer0 + index] = false; 344 flags[Dirty::ColorBuffer0 + index] = false;
321 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); 345 BindRenderTarget(&color_buffer_id, FindColorBuffer(index));
322 } 346 }
323 check_rescale(color_buffer_id, tmp_color_images[index]); 347 check_rescale(color_buffer_id, tmp_color_images[index]);
324 } 348 }
325 if (flags[Dirty::ZetaBuffer] || force) { 349 if (flags[Dirty::ZetaBuffer] || force) {
326 flags[Dirty::ZetaBuffer] = false; 350 flags[Dirty::ZetaBuffer] = false;
327 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); 351 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer());
328 } 352 }
329 check_rescale(render_targets.depth_buffer_id, tmp_depth_image); 353 check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
330 354
@@ -389,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
389 return; 413 return;
390 } 414 }
391 415
392 const bool rescaled = RescaleRenderTargets(is_clear); 416 const bool rescaled = RescaleRenderTargets();
393 if (is_rescaling != rescaled) { 417 if (is_rescaling != rescaled) {
394 flags[Dirty::RescaleViewports] = true; 418 flags[Dirty::RescaleViewports] = true;
395 flags[Dirty::RescaleScissors] = true; 419 flags[Dirty::RescaleScissors] = true;
@@ -502,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
502 526
503template <class P> 527template <class P>
504void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { 528void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
505 std::vector<ImageId> images; 529 boost::container::small_vector<ImageId, 16> images;
506 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { 530 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
507 if (!image.IsSafeDownload()) { 531 if (!image.IsSafeDownload()) {
508 return; 532 return;
@@ -555,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
555 579
556template <class P> 580template <class P>
557void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { 581void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
558 std::vector<ImageId> deleted_images; 582 boost::container::small_vector<ImageId, 16> deleted_images;
559 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); 583 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
560 for (const ImageId id : deleted_images) { 584 for (const ImageId id : deleted_images) {
561 Image& image = slot_images[id]; 585 Image& image = slot_images[id];
@@ -569,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
569 593
570template <class P> 594template <class P>
571void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { 595void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
572 std::vector<ImageId> deleted_images; 596 boost::container::small_vector<ImageId, 16> deleted_images;
573 ForEachImageInRegionGPU(as_id, gpu_addr, size, 597 ForEachImageInRegionGPU(as_id, gpu_addr, size,
574 [&](ImageId id, Image&) { deleted_images.push_back(id); }); 598 [&](ImageId id, Image&) { deleted_images.push_back(id); });
575 for (const ImageId id : deleted_images) { 599 for (const ImageId id : deleted_images) {
@@ -1077,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1077 const bool native_bgr = runtime.HasNativeBgr(); 1101 const bool native_bgr = runtime.HasNativeBgr();
1078 const bool flexible_formats = True(options & RelaxedOptions::Format); 1102 const bool flexible_formats = True(options & RelaxedOptions::Format);
1079 ImageId image_id{}; 1103 ImageId image_id{};
1080 boost::container::small_vector<ImageId, 1> image_ids; 1104 boost::container::small_vector<ImageId, 8> image_ids;
1081 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1105 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1082 if (True(existing_image.flags & ImageFlagBits::Remapped)) { 1106 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1083 return false; 1107 return false;
@@ -1598,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
1598 } 1622 }
1599 } 1623 }
1600 ImageId image_id{}; 1624 ImageId image_id{};
1601 boost::container::small_vector<ImageId, 1> image_ids; 1625 boost::container::small_vector<ImageId, 8> image_ids;
1602 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1626 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1603 if (True(existing_image.flags & ImageFlagBits::Remapped)) { 1627 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1604 return false; 1628 return false;
@@ -1658,7 +1682,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1658} 1682}
1659 1683
1660template <class P> 1684template <class P>
1661ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { 1685ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
1662 const auto& regs = maxwell3d->regs; 1686 const auto& regs = maxwell3d->regs;
1663 if (index >= regs.rt_control.count) { 1687 if (index >= regs.rt_control.count) {
1664 return ImageViewId{}; 1688 return ImageViewId{};
@@ -1672,11 +1696,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1672 return ImageViewId{}; 1696 return ImageViewId{};
1673 } 1697 }
1674 const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); 1698 const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode);
1675 return FindRenderTargetView(info, gpu_addr, is_clear); 1699 return FindRenderTargetView(info, gpu_addr);
1676} 1700}
1677 1701
1678template <class P> 1702template <class P>
1679ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { 1703ImageViewId TextureCache<P>::FindDepthBuffer() {
1680 const auto& regs = maxwell3d->regs; 1704 const auto& regs = maxwell3d->regs;
1681 if (!regs.zeta_enable) { 1705 if (!regs.zeta_enable) {
1682 return ImageViewId{}; 1706 return ImageViewId{};
@@ -1686,18 +1710,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1686 return ImageViewId{}; 1710 return ImageViewId{};
1687 } 1711 }
1688 const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); 1712 const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode);
1689 return FindRenderTargetView(info, gpu_addr, is_clear); 1713 return FindRenderTargetView(info, gpu_addr);
1690} 1714}
1691 1715
1692template <class P> 1716template <class P>
1693ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, 1717ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) {
1694 bool is_clear) {
1695 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1696 ImageId image_id{}; 1718 ImageId image_id{};
1697 bool delete_state = has_deleted_images; 1719 bool delete_state = has_deleted_images;
1698 do { 1720 do {
1699 has_deleted_images = false; 1721 has_deleted_images = false;
1700 image_id = FindOrInsertImage(info, gpu_addr, options); 1722 image_id = FindOrInsertImage(info, gpu_addr);
1701 delete_state |= has_deleted_images; 1723 delete_state |= has_deleted_images;
1702 } while (has_deleted_images); 1724 } while (has_deleted_images);
1703 has_deleted_images = delete_state; 1725 has_deleted_images = delete_state;
@@ -1920,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1920 image.map_view_id = map_id; 1942 image.map_view_id = map_id;
1921 return; 1943 return;
1922 } 1944 }
1923 std::vector<ImageViewId> sparse_maps{}; 1945 boost::container::small_vector<ImageViewId, 16> sparse_maps;
1924 ForEachSparseSegment( 1946 ForEachSparseSegment(
1925 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1947 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1926 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); 1948 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@@ -2195,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
2195 2217
2196template <class P> 2218template <class P>
2197void TextureCache<P>::SynchronizeAliases(ImageId image_id) { 2219void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
2198 boost::container::small_vector<const AliasedImage*, 1> aliased_images; 2220 boost::container::small_vector<const AliasedImage*, 8> aliased_images;
2199 Image& image = slot_images[image_id]; 2221 Image& image = slot_images[image_id];
2200 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); 2222 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
2201 bool any_modified = True(image.flags & ImageFlagBits::GpuModified); 2223 bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 3bfa92154..e9ec91265 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -56,7 +56,7 @@ struct ImageViewInOut {
56struct AsyncDecodeContext { 56struct AsyncDecodeContext {
57 ImageId image_id; 57 ImageId image_id;
58 Common::ScratchBuffer<u8> decoded_data; 58 Common::ScratchBuffer<u8> decoded_data;
59 std::vector<BufferImageCopy> copies; 59 boost::container::small_vector<BufferImageCopy, 16> copies;
60 std::mutex mutex; 60 std::mutex mutex;
61 std::atomic_bool complete; 61 std::atomic_bool complete;
62}; 62};
@@ -159,6 +159,18 @@ public:
159 /// Get the sampler from the compute descriptor table in the specified index 159 /// Get the sampler from the compute descriptor table in the specified index
160 Sampler* GetComputeSampler(u32 index); 160 Sampler* GetComputeSampler(u32 index);
161 161
162 /// Get the sampler id from the graphics descriptor table in the specified index
163 SamplerId GetGraphicsSamplerId(u32 index);
164
165 /// Get the sampler id from the compute descriptor table in the specified index
166 SamplerId GetComputeSamplerId(u32 index);
167
168 /// Return a constant reference to the given sampler id
169 [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
170
171 /// Return a reference to the given sampler id
172 [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
173
162 /// Refresh the state for graphics image view and sampler descriptors 174 /// Refresh the state for graphics image view and sampler descriptors
163 void SynchronizeGraphicsDescriptors(); 175 void SynchronizeGraphicsDescriptors();
164 176
@@ -166,9 +178,8 @@ public:
166 void SynchronizeComputeDescriptors(); 178 void SynchronizeComputeDescriptors();
167 179
168 /// Updates the Render Targets if they can be rescaled 180 /// Updates the Render Targets if they can be rescaled
169 /// @param is_clear True when the render targets are being used for clears
170 /// @retval True if the Render Targets have been rescaled. 181 /// @retval True if the Render Targets have been rescaled.
171 bool RescaleRenderTargets(bool is_clear); 182 bool RescaleRenderTargets();
172 183
173 /// Update bound render targets and upload memory if necessary 184 /// Update bound render targets and upload memory if necessary
174 /// @param is_clear True when the render targets are being used for clears 185 /// @param is_clear True when the render targets are being used for clears
@@ -324,14 +335,13 @@ private:
324 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); 335 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
325 336
326 /// Find or create an image view for the given color buffer index 337 /// Find or create an image view for the given color buffer index
327 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); 338 [[nodiscard]] ImageViewId FindColorBuffer(size_t index);
328 339
329 /// Find or create an image view for the depth buffer 340 /// Find or create an image view for the depth buffer
330 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); 341 [[nodiscard]] ImageViewId FindDepthBuffer();
331 342
332 /// Find or create a view for a render target with the given image parameters 343 /// Find or create a view for a render target with the given image parameters
333 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, 344 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
334 bool is_clear);
335 345
336 /// Iterates over all the images in a region calling func 346 /// Iterates over all the images in a region calling func
337 template <typename Func> 347 template <typename Func>
@@ -419,7 +429,7 @@ private:
419 429
420 std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; 430 std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
421 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; 431 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
422 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 432 std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
423 433
424 VAddr virtual_invalid_space{}; 434 VAddr virtual_invalid_space{};
425 435
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 95a5b47d8..f781cb7a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -329,13 +329,13 @@ template <u32 GOB_EXTENT>
329 329
330[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( 330[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
331 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { 331 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
332 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); 332 const auto slice_offsets = CalculateSliceOffsets(new_info);
333 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); 333 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
334 const auto it = std::ranges::find(slice_offsets, diff); 334 const auto it = std::ranges::find(slice_offsets, diff);
335 if (it == slice_offsets.end()) { 335 if (it == slice_offsets.end()) {
336 return std::nullopt; 336 return std::nullopt;
337 } 337 }
338 const std::vector subresources = CalculateSliceSubresources(new_info); 338 const auto subresources = CalculateSliceSubresources(new_info);
339 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; 339 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
340 const ImageInfo& info = overlap.info; 340 const ImageInfo& info = overlap.info;
341 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { 341 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
655 return sizes; 655 return sizes;
656} 656}
657 657
658std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { 658boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
659 ASSERT(info.type == ImageType::e3D); 659 ASSERT(info.type == ImageType::e3D);
660 std::vector<u32> offsets; 660 boost::container::small_vector<u32, 16> offsets;
661 offsets.reserve(NumSlices(info)); 661 offsets.reserve(NumSlices(info));
662 662
663 const LevelInfo level_info = MakeLevelInfo(info); 663 const LevelInfo level_info = MakeLevelInfo(info);
@@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
679 return offsets; 679 return offsets;
680} 680}
681 681
682std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { 682boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
683 const ImageInfo& info) {
683 ASSERT(info.type == ImageType::e3D); 684 ASSERT(info.type == ImageType::e3D);
684 std::vector<SubresourceBase> subresources; 685 boost::container::small_vector<SubresourceBase, 16> subresources;
685 subresources.reserve(NumSlices(info)); 686 subresources.reserve(NumSlices(info));
686 for (s32 level = 0; level < info.resources.levels; ++level) { 687 for (s32 level = 0; level < info.resources.levels; ++level) {
687 const s32 depth = AdjustMipSize(info.size.depth, level); 688 const s32 depth = AdjustMipSize(info.size.depth, level);
@@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
723 } 724 }
724} 725}
725 726
726std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, 727boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
727 SubresourceBase base, u32 up_scale, u32 down_shift) { 728 const ImageInfo& src,
729 SubresourceBase base,
730 u32 up_scale, u32 down_shift) {
728 ASSERT(dst.resources.levels >= src.resources.levels); 731 ASSERT(dst.resources.levels >= src.resources.levels);
729 732
730 const bool is_dst_3d = dst.type == ImageType::e3D; 733 const bool is_dst_3d = dst.type == ImageType::e3D;
@@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
733 ASSERT(src.resources.levels == 1); 736 ASSERT(src.resources.levels == 1);
734 } 737 }
735 const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; 738 const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
736 std::vector<ImageCopy> copies; 739 boost::container::small_vector<ImageCopy, 16> copies;
737 copies.reserve(src.resources.levels); 740 copies.reserve(src.resources.levels);
738 for (s32 level = 0; level < src.resources.levels; ++level) { 741 for (s32 level = 0; level < src.resources.levels; ++level) {
739 ImageCopy& copy = copies.emplace_back(); 742 ImageCopy& copy = copies.emplace_back();
@@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
770 return copies; 773 return copies;
771} 774}
772 775
773std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, 776boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
774 u32 down_shift) { 777 u32 up_scale,
775 std::vector<ImageCopy> copies; 778 u32 down_shift) {
779 boost::container::small_vector<ImageCopy, 16> copies;
776 copies.reserve(src.resources.levels); 780 copies.reserve(src.resources.levels);
777 const bool is_3d = src.type == ImageType::e3D; 781 const bool is_3d = src.type == ImageType::e3D;
778 for (s32 level = 0; level < src.resources.levels; ++level) { 782 for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
824 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); 828 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
825} 829}
826 830
827std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 831boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
828 const ImageInfo& info, std::span<const u8> input, 832 GPUVAddr gpu_addr,
829 std::span<u8> output) { 833 const ImageInfo& info,
834 std::span<const u8> input,
835 std::span<u8> output) {
830 const size_t guest_size_bytes = input.size_bytes(); 836 const size_t guest_size_bytes = input.size_bytes();
831 const u32 bpp_log2 = BytesPerBlockLog2(info.format); 837 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
832 const Extent3D size = info.size; 838 const Extent3D size = info.size;
@@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
861 info.tile_width_spacing); 867 info.tile_width_spacing);
862 size_t guest_offset = 0; 868 size_t guest_offset = 0;
863 u32 host_offset = 0; 869 u32 host_offset = 0;
864 std::vector<BufferImageCopy> copies(num_levels); 870 boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
865 871
866 for (s32 level = 0; level < num_levels; ++level) { 872 for (s32 level = 0; level < num_levels; ++level) {
867 const Extent3D level_size = AdjustMipSize(size, level); 873 const Extent3D level_size = AdjustMipSize(size, level);
@@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
978 } 984 }
979} 985}
980 986
981std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { 987boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
982 const Extent3D size = info.size; 988 const Extent3D size = info.size;
983 const u32 bytes_per_block = BytesPerBlock(info.format); 989 const u32 bytes_per_block = BytesPerBlock(info.format);
984 if (info.type == ImageType::Linear) { 990 if (info.type == ImageType::Linear) {
@@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
1006 1012
1007 u32 host_offset = 0; 1013 u32 host_offset = 0;
1008 1014
1009 std::vector<BufferImageCopy> copies(num_levels); 1015 boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
1010 for (s32 level = 0; level < num_levels; ++level) { 1016 for (s32 level = 0; level < num_levels; ++level) {
1011 const Extent3D level_size = AdjustMipSize(size, level); 1017 const Extent3D level_size = AdjustMipSize(size, level);
1012 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); 1018 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
1042 return AdjustMipBlockSize(num_tiles, level_info.block, level); 1048 return AdjustMipBlockSize(num_tiles, level_info.block, level);
1043} 1049}
1044 1050
1045std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { 1051boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
1046 const Extent2D tile_size = DefaultBlockSize(info.format); 1052 const Extent2D tile_size = DefaultBlockSize(info.format);
1047 if (info.type == ImageType::Linear) { 1053 if (info.type == ImageType::Linear) {
1048 return std::vector{SwizzleParameters{ 1054 return {SwizzleParameters{
1049 .num_tiles = AdjustTileSize(info.size, tile_size), 1055 .num_tiles = AdjustTileSize(info.size, tile_size),
1050 .block = {}, 1056 .block = {},
1051 .buffer_offset = 0, 1057 .buffer_offset = 0,
@@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1057 const s32 num_levels = info.resources.levels; 1063 const s32 num_levels = info.resources.levels;
1058 1064
1059 u32 guest_offset = 0; 1065 u32 guest_offset = 0;
1060 std::vector<SwizzleParameters> params(num_levels); 1066 boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
1061 for (s32 level = 0; level < num_levels; ++level) { 1067 for (s32 level = 0; level < num_levels; ++level) {
1062 const Extent3D level_size = AdjustMipSize(size, level); 1068 const Extent3D level_size = AdjustMipSize(size, level);
1063 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); 1069 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 84aa6880d..ab45a43c4 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -5,6 +5,7 @@
5 5
6#include <optional> 6#include <optional>
7#include <span> 7#include <span>
8#include <boost/container/small_vector.hpp>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "common/scratch_buffer.h" 11#include "common/scratch_buffer.h"
@@ -40,9 +41,10 @@ struct OverlapResult {
40 41
41[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; 42[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
42 43
43[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); 44[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
44 45
45[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); 46[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
47 const ImageInfo& info);
46 48
47[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); 49[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
48 50
@@ -51,21 +53,18 @@ struct OverlapResult {
51 53
52[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; 54[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
53 55
54[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, 56[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
55 const ImageInfo& src, 57 const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
56 SubresourceBase base, u32 up_scale = 1, 58 u32 down_shift = 0);
57 u32 down_shift = 0);
58 59
59[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, 60[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
60 u32 up_scale = 1, 61 const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
61 u32 down_shift = 0);
62 62
63[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); 63[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
64 64
65[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 65[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
66 GPUVAddr gpu_addr, const ImageInfo& info, 66 Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
67 std::span<const u8> input, 67 std::span<const u8> input, std::span<u8> output);
68 std::span<u8> output);
69 68
70[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 69[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
71 const ImageBase& image, std::span<u8> output); 70 const ImageBase& image, std::span<u8> output);
@@ -73,13 +72,15 @@ struct OverlapResult {
73void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 72void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
74 std::span<BufferImageCopy> copies); 73 std::span<BufferImageCopy> copies);
75 74
76[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); 75[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
76 const ImageInfo& info);
77 77
78[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); 78[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
79 79
80[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); 80[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
81 81
82[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); 82[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
83 const ImageInfo& info);
83 84
84void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, 85void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
85 std::span<const BufferImageCopy> copies, std::span<const u8> memory, 86 std::span<const BufferImageCopy> copies, std::span<const u8> memory,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4a80a59f9..d8b88d9bc 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -62,7 +62,12 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept {
62} 62}
63 63
64float TSCEntry::MaxAnisotropy() const noexcept { 64float TSCEntry::MaxAnisotropy() const noexcept {
65 if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) { 65 const bool is_suitable_mipmap_filter = mipmap_filter != TextureMipmapFilter::None;
66 const bool has_regular_lods = min_lod_clamp == 0 && max_lod_clamp >= 256;
67 const bool is_bilinear_filter = min_filter == TextureFilter::Linear &&
68 reduction_filter == SamplerReduction::WeightedAverage;
69 if (max_anisotropy == 0 && (!is_suitable_mipmap_filter || !has_regular_lods ||
70 !is_bilinear_filter || depth_compare_enabled)) {
66 return 1.0f; 71 return 1.0f;
67 } 72 }
68 const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); 73 const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue();
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
index 155599316..1f353d2df 100644
--- a/src/video_core/transform_feedback.cpp
+++ b/src/video_core/transform_feedback.cpp
@@ -13,7 +13,7 @@
13 13
14namespace VideoCommon { 14namespace VideoCommon {
15 15
16std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 16std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
17 const TransformFeedbackState& state) { 17 const TransformFeedbackState& state) {
18 static constexpr std::array VECTORS{ 18 static constexpr std::array VECTORS{
19 28U, // gl_Position 19 28U, // gl_Position
@@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
62 216U, // gl_TexCoord[6] 62 216U, // gl_TexCoord[6]
63 220U, // gl_TexCoord[7] 63 220U, // gl_TexCoord[7]
64 }; 64 };
65 std::vector<Shader::TransformFeedbackVarying> xfb(256); 65 std::array<Shader::TransformFeedbackVarying, 256> xfb{};
66 u32 count{0};
66 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { 67 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
67 const auto& locations = state.varyings[buffer]; 68 const auto& locations = state.varyings[buffer];
68 const auto& layout = state.layouts[buffer]; 69 const auto& layout = state.layouts[buffer];
@@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
103 } 104 }
104 } 105 }
105 xfb[attribute] = varying; 106 xfb[attribute] = varying;
107 count = std::max(count, attribute);
106 highest = std::max(highest, (base_offset + varying.components) * 4); 108 highest = std::max(highest, (base_offset + varying.components) * 4);
107 } 109 }
108 UNIMPLEMENTED_IF(highest != layout.stride); 110 UNIMPLEMENTED_IF(highest != layout.stride);
109 } 111 }
110 return xfb; 112 return {xfb, count + 1};
111} 113}
112 114
113} // namespace VideoCommon 115} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
index d13eb16c3..401b1352a 100644
--- a/src/video_core/transform_feedback.h
+++ b/src/video_core/transform_feedback.h
@@ -24,7 +24,7 @@ struct TransformFeedbackState {
24 varyings; 24 varyings;
25}; 25};
26 26
27std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 27std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
28 const TransformFeedbackState& state); 28 const TransformFeedbackState& state);
29 29
30} // namespace VideoCommon 30} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 3d2e9a16a..b11abe311 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
316std::vector<const char*> ExtensionListForVulkan( 316std::vector<const char*> ExtensionListForVulkan(
317 const std::set<std::string, std::less<>>& extensions) { 317 const std::set<std::string, std::less<>>& extensions) {
318 std::vector<const char*> output; 318 std::vector<const char*> output;
319 output.reserve(extensions.size());
319 for (const auto& extension : extensions) { 320 for (const auto& extension : extensions) {
320 output.push_back(extension.c_str()); 321 output.push_back(extension.c_str());
321 } 322 }
@@ -562,6 +563,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
562 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); 563 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
563 cant_blit_msaa = true; 564 cant_blit_msaa = true;
564 } 565 }
566 has_broken_compute =
567 CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
568 !Settings::values.enable_compute_pipelines.GetValue();
565 if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { 569 if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
566 LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); 570 LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
567 must_emulate_bgr565 = true; 571 must_emulate_bgr565 = true;
@@ -783,9 +787,6 @@ bool Device::GetSuitability(bool requires_swapchain) {
783 787
784 FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION); 788 FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION);
785 FOR_EACH_VK_EXTENSION(EXTENSION); 789 FOR_EACH_VK_EXTENSION(EXTENSION);
786#ifdef _WIN32
787 FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
788#endif
789 790
790#undef FEATURE_EXTENSION 791#undef FEATURE_EXTENSION
791#undef EXTENSION 792#undef EXTENSION
@@ -804,11 +805,6 @@ bool Device::GetSuitability(bool requires_swapchain) {
804 805
805 FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION); 806 FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION);
806 FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION); 807 FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION);
807#ifdef _WIN32
808 FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION);
809#else
810 FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION);
811#endif
812 808
813 if (requires_swapchain) { 809 if (requires_swapchain) {
814 CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 810 CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index f314d0ffe..0b634a876 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,6 +10,7 @@
10#include <vector> 10#include <vector>
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h"
13#include "common/settings.h" 14#include "common/settings.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 15#include "video_core/vulkan_common/vulkan_wrapper.h"
15 16
@@ -68,7 +69,6 @@
68 EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \ 69 EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \
69 EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \ 70 EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \
70 EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \ 71 EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \
71 EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \
72 EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \ 72 EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \
73 EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \ 73 EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \
74 EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ 74 EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
@@ -80,9 +80,6 @@
80 EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ 80 EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
81 EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) 81 EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle)
82 82
83#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \
84 EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32)
85
86// Define extensions which must be supported. 83// Define extensions which must be supported.
87#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ 84#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
88 EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ 85 EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \
@@ -90,12 +87,6 @@
90 EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ 87 EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \
91 EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME) 88 EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME)
92 89
93#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \
94 EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME)
95
96#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \
97 EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME)
98
99// Define extensions where the absence of the extension may result in a degraded experience. 90// Define extensions where the absence of the extension may result in a degraded experience.
100#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ 91#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
101 EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ 92 EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
@@ -528,6 +519,11 @@ public:
528 return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue(); 519 return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue();
529 } 520 }
530 521
522 /// @returns True if compute pipelines can cause crashing.
523 bool HasBrokenCompute() const {
524 return has_broken_compute;
525 }
526
531 /// Returns true when the device does not properly support cube compatibility. 527 /// Returns true when the device does not properly support cube compatibility.
532 bool HasBrokenCubeImageCompability() const { 528 bool HasBrokenCubeImageCompability() const {
533 return has_broken_cube_compatibility; 529 return has_broken_cube_compatibility;
@@ -589,6 +585,22 @@ public:
589 return supports_conditional_barriers; 585 return supports_conditional_barriers;
590 } 586 }
591 587
588 [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
589 u32 driver_version) {
590 if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
591 const u32 major = VK_API_VERSION_MAJOR(driver_version);
592 const u32 minor = VK_API_VERSION_MINOR(driver_version);
593 const u32 patch = VK_API_VERSION_PATCH(driver_version);
594 if (major == 0 && minor == 405 && patch < 286) {
595 LOG_WARNING(
596 Render_Vulkan,
597 "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute");
598 return true;
599 }
600 }
601 return false;
602 }
603
592private: 604private:
593 /// Checks if the physical device is suitable and configures the object state 605 /// Checks if the physical device is suitable and configures the object state
594 /// with all necessary info about its properties. 606 /// with all necessary info about its properties.
@@ -636,7 +648,6 @@ private:
636 FOR_EACH_VK_FEATURE_1_3(FEATURE); 648 FOR_EACH_VK_FEATURE_1_3(FEATURE);
637 FOR_EACH_VK_FEATURE_EXT(FEATURE); 649 FOR_EACH_VK_FEATURE_EXT(FEATURE);
638 FOR_EACH_VK_EXTENSION(EXTENSION); 650 FOR_EACH_VK_EXTENSION(EXTENSION);
639 FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
640 651
641#undef EXTENSION 652#undef EXTENSION
642#undef FEATURE 653#undef FEATURE
@@ -683,6 +694,7 @@ private:
683 bool is_integrated{}; ///< Is GPU an iGPU. 694 bool is_integrated{}; ///< Is GPU an iGPU.
684 bool is_virtual{}; ///< Is GPU a virtual GPU. 695 bool is_virtual{}; ///< Is GPU a virtual GPU.
685 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. 696 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
697 bool has_broken_compute{}; ///< Compute shaders can cause crashes
686 bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit 698 bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
687 bool has_renderdoc{}; ///< Has RenderDoc attached 699 bool has_renderdoc{}; ///< Has RenderDoc attached
688 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 700 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 84d9ca796..733c296e4 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -210,6 +210,8 @@ add_executable(yuzu
210 util/url_request_interceptor.h 210 util/url_request_interceptor.h
211 util/util.cpp 211 util/util.cpp
212 util/util.h 212 util/util.h
213 vk_device_info.cpp
214 vk_device_info.h
213 compatdb.cpp 215 compatdb.cpp
214 compatdb.h 216 compatdb.h
215 yuzu.qrc 217 yuzu.qrc
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index bac9dff90..edc206a25 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -761,6 +761,7 @@ void Config::ReadRendererValues() {
761 ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); 761 ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
762 ReadGlobalSetting(Settings::values.enable_compute_pipelines); 762 ReadGlobalSetting(Settings::values.enable_compute_pipelines);
763 ReadGlobalSetting(Settings::values.use_video_framerate); 763 ReadGlobalSetting(Settings::values.use_video_framerate);
764 ReadGlobalSetting(Settings::values.barrier_feedback_loops);
764 ReadGlobalSetting(Settings::values.bg_red); 765 ReadGlobalSetting(Settings::values.bg_red);
765 ReadGlobalSetting(Settings::values.bg_green); 766 ReadGlobalSetting(Settings::values.bg_green);
766 ReadGlobalSetting(Settings::values.bg_blue); 767 ReadGlobalSetting(Settings::values.bg_blue);
@@ -1417,6 +1418,7 @@ void Config::SaveRendererValues() {
1417 WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); 1418 WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
1418 WriteGlobalSetting(Settings::values.enable_compute_pipelines); 1419 WriteGlobalSetting(Settings::values.enable_compute_pipelines);
1419 WriteGlobalSetting(Settings::values.use_video_framerate); 1420 WriteGlobalSetting(Settings::values.use_video_framerate);
1421 WriteGlobalSetting(Settings::values.barrier_feedback_loops);
1420 WriteGlobalSetting(Settings::values.bg_red); 1422 WriteGlobalSetting(Settings::values.bg_red);
1421 WriteGlobalSetting(Settings::values.bg_green); 1423 WriteGlobalSetting(Settings::values.bg_green);
1422 WriteGlobalSetting(Settings::values.bg_blue); 1424 WriteGlobalSetting(Settings::values.bg_blue);
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index 8e76a819a..bdf83ebfe 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -6,6 +6,7 @@
6#include "common/settings.h" 6#include "common/settings.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "ui_configure.h" 8#include "ui_configure.h"
9#include "vk_device_info.h"
9#include "yuzu/configuration/config.h" 10#include "yuzu/configuration/config.h"
10#include "yuzu/configuration/configure_audio.h" 11#include "yuzu/configuration/configure_audio.h"
11#include "yuzu/configuration/configure_cpu.h" 12#include "yuzu/configuration/configure_cpu.h"
@@ -28,6 +29,7 @@
28 29
29ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_, 30ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_,
30 InputCommon::InputSubsystem* input_subsystem, 31 InputCommon::InputSubsystem* input_subsystem,
32 std::vector<VkDeviceInfo::Record>& vk_device_records,
31 Core::System& system_, bool enable_web_config) 33 Core::System& system_, bool enable_web_config)
32 : QDialog(parent), ui{std::make_unique<Ui::ConfigureDialog>()}, 34 : QDialog(parent), ui{std::make_unique<Ui::ConfigureDialog>()},
33 registry(registry_), system{system_}, audio_tab{std::make_unique<ConfigureAudio>(system_, 35 registry(registry_), system{system_}, audio_tab{std::make_unique<ConfigureAudio>(system_,
@@ -38,7 +40,8 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_,
38 general_tab{std::make_unique<ConfigureGeneral>(system_, this)}, 40 general_tab{std::make_unique<ConfigureGeneral>(system_, this)},
39 graphics_advanced_tab{std::make_unique<ConfigureGraphicsAdvanced>(system_, this)}, 41 graphics_advanced_tab{std::make_unique<ConfigureGraphicsAdvanced>(system_, this)},
40 graphics_tab{std::make_unique<ConfigureGraphics>( 42 graphics_tab{std::make_unique<ConfigureGraphics>(
41 system_, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this)}, 43 system_, vk_device_records, [&]() { graphics_advanced_tab->ExposeComputeOption(); },
44 this)},
42 hotkeys_tab{std::make_unique<ConfigureHotkeys>(system_.HIDCore(), this)}, 45 hotkeys_tab{std::make_unique<ConfigureHotkeys>(system_.HIDCore(), this)},
43 input_tab{std::make_unique<ConfigureInput>(system_, this)}, 46 input_tab{std::make_unique<ConfigureInput>(system_, this)},
44 network_tab{std::make_unique<ConfigureNetwork>(system_, this)}, 47 network_tab{std::make_unique<ConfigureNetwork>(system_, this)},
diff --git a/src/yuzu/configuration/configure_dialog.h b/src/yuzu/configuration/configure_dialog.h
index a086a07c4..2a08b7fee 100644
--- a/src/yuzu/configuration/configure_dialog.h
+++ b/src/yuzu/configuration/configure_dialog.h
@@ -4,7 +4,9 @@
4#pragma once 4#pragma once
5 5
6#include <memory> 6#include <memory>
7#include <vector>
7#include <QDialog> 8#include <QDialog>
9#include "yuzu/vk_device_info.h"
8 10
9namespace Core { 11namespace Core {
10class System; 12class System;
@@ -40,8 +42,9 @@ class ConfigureDialog : public QDialog {
40 42
41public: 43public:
42 explicit ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_, 44 explicit ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_,
43 InputCommon::InputSubsystem* input_subsystem, Core::System& system_, 45 InputCommon::InputSubsystem* input_subsystem,
44 bool enable_web_config = true); 46 std::vector<VkDeviceInfo::Record>& vk_device_records,
47 Core::System& system_, bool enable_web_config = true);
45 ~ConfigureDialog() override; 48 ~ConfigureDialog() override;
46 49
47 void ApplyConfiguration(); 50 void ApplyConfiguration();
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 431585216..a4965524a 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -1,10 +1,6 @@
1// SPDX-FileCopyrightText: 2016 Citra Emulator Project 1// SPDX-FileCopyrightText: 2016 Citra Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4// Include this early to include Vulkan headers how we want to
5#include "video_core/vulkan_common/vulkan_device.h"
6#include "video_core/vulkan_common/vulkan_wrapper.h"
7
8#include <algorithm> 4#include <algorithm>
9#include <functional> 5#include <functional>
10#include <iosfwd> 6#include <iosfwd>
@@ -34,13 +30,11 @@
34#include "common/settings.h" 30#include "common/settings.h"
35#include "core/core.h" 31#include "core/core.h"
36#include "ui_configure_graphics.h" 32#include "ui_configure_graphics.h"
37#include "video_core/vulkan_common/vulkan_instance.h"
38#include "video_core/vulkan_common/vulkan_library.h"
39#include "video_core/vulkan_common/vulkan_surface.h"
40#include "yuzu/configuration/configuration_shared.h" 33#include "yuzu/configuration/configuration_shared.h"
41#include "yuzu/configuration/configure_graphics.h" 34#include "yuzu/configuration/configure_graphics.h"
42#include "yuzu/qt_common.h" 35#include "yuzu/qt_common.h"
43#include "yuzu/uisettings.h" 36#include "yuzu/uisettings.h"
37#include "yuzu/vk_device_info.h"
44 38
45static const std::vector<VkPresentModeKHR> default_present_modes{VK_PRESENT_MODE_IMMEDIATE_KHR, 39static const std::vector<VkPresentModeKHR> default_present_modes{VK_PRESENT_MODE_IMMEDIATE_KHR,
46 VK_PRESENT_MODE_FIFO_KHR}; 40 VK_PRESENT_MODE_FIFO_KHR};
@@ -77,9 +71,10 @@ static constexpr Settings::VSyncMode PresentModeToSetting(VkPresentModeKHR mode)
77} 71}
78 72
79ConfigureGraphics::ConfigureGraphics(const Core::System& system_, 73ConfigureGraphics::ConfigureGraphics(const Core::System& system_,
74 std::vector<VkDeviceInfo::Record>& records_,
80 const std::function<void()>& expose_compute_option_, 75 const std::function<void()>& expose_compute_option_,
81 QWidget* parent) 76 QWidget* parent)
82 : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, 77 : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, records{records_},
83 expose_compute_option{expose_compute_option_}, system{system_} { 78 expose_compute_option{expose_compute_option_}, system{system_} {
84 vulkan_device = Settings::values.vulkan_device.GetValue(); 79 vulkan_device = Settings::values.vulkan_device.GetValue();
85 RetrieveVulkanDevices(); 80 RetrieveVulkanDevices();
@@ -504,47 +499,19 @@ void ConfigureGraphics::UpdateAPILayout() {
504 } 499 }
505} 500}
506 501
507void ConfigureGraphics::RetrieveVulkanDevices() try { 502void ConfigureGraphics::RetrieveVulkanDevices() {
508 if (UISettings::values.has_broken_vulkan) {
509 return;
510 }
511
512 using namespace Vulkan;
513
514 auto* window = this->window()->windowHandle();
515 auto wsi = QtCommon::GetWindowSystemInfo(window);
516
517 vk::InstanceDispatch dld;
518 const auto library = OpenLibrary();
519 const vk::Instance instance = CreateInstance(*library, dld, VK_API_VERSION_1_1, wsi.type);
520 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
521 vk::SurfaceKHR surface = CreateSurface(instance, wsi);
522
523 vulkan_devices.clear(); 503 vulkan_devices.clear();
524 vulkan_devices.reserve(physical_devices.size()); 504 vulkan_devices.reserve(records.size());
525 device_present_modes.clear(); 505 device_present_modes.clear();
526 device_present_modes.reserve(physical_devices.size()); 506 device_present_modes.reserve(records.size());
527 for (const VkPhysicalDevice device : physical_devices) { 507 for (const auto& record : records) {
528 const auto physical_device = vk::PhysicalDevice(device, dld); 508 vulkan_devices.push_back(QString::fromStdString(record.name));
529 const std::string name = physical_device.GetProperties().deviceName; 509 device_present_modes.push_back(record.vsync_support);
530 const std::vector<VkPresentModeKHR> present_modes = 510
531 physical_device.GetSurfacePresentModesKHR(*surface); 511 if (record.has_broken_compute) {
532 vulkan_devices.push_back(QString::fromStdString(name));
533 device_present_modes.push_back(present_modes);
534
535 VkPhysicalDeviceDriverProperties driver_properties{};
536 driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
537 driver_properties.pNext = nullptr;
538 VkPhysicalDeviceProperties2 properties{};
539 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
540 properties.pNext = &driver_properties;
541 dld.vkGetPhysicalDeviceProperties2(physical_device, &properties);
542 if (driver_properties.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
543 expose_compute_option(); 512 expose_compute_option();
544 } 513 }
545 } 514 }
546} catch (const Vulkan::vk::Exception& exception) {
547 LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
548} 515}
549 516
550Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { 517Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index 364b1cac2..be9310b74 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -12,6 +12,7 @@
12#include <qobjectdefs.h> 12#include <qobjectdefs.h>
13#include <vulkan/vulkan_core.h> 13#include <vulkan/vulkan_core.h>
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "vk_device_info.h"
15 16
16class QEvent; 17class QEvent;
17class QObject; 18class QObject;
@@ -39,6 +40,7 @@ class ConfigureGraphics : public QWidget {
39 40
40public: 41public:
41 explicit ConfigureGraphics(const Core::System& system_, 42 explicit ConfigureGraphics(const Core::System& system_,
43 std::vector<VkDeviceInfo::Record>& records,
42 const std::function<void()>& expose_compute_option_, 44 const std::function<void()>& expose_compute_option_,
43 QWidget* parent = nullptr); 45 QWidget* parent = nullptr);
44 ~ConfigureGraphics() override; 46 ~ConfigureGraphics() override;
@@ -77,6 +79,7 @@ private:
77 ConfigurationShared::CheckState use_disk_shader_cache; 79 ConfigurationShared::CheckState use_disk_shader_cache;
78 ConfigurationShared::CheckState use_asynchronous_gpu_emulation; 80 ConfigurationShared::CheckState use_asynchronous_gpu_emulation;
79 81
82 std::vector<VkDeviceInfo::Record>& records;
80 std::vector<QString> vulkan_devices; 83 std::vector<QString> vulkan_devices;
81 std::vector<std::vector<VkPresentModeKHR>> device_present_modes; 84 std::vector<std::vector<VkPresentModeKHR>> device_present_modes;
82 std::vector<VkPresentModeKHR> 85 std::vector<VkPresentModeKHR>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 0463ac8b9..c0a044767 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -43,6 +43,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
43 ui->enable_compute_pipelines_checkbox->setChecked( 43 ui->enable_compute_pipelines_checkbox->setChecked(
44 Settings::values.enable_compute_pipelines.GetValue()); 44 Settings::values.enable_compute_pipelines.GetValue());
45 ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue()); 45 ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue());
46 ui->barrier_feedback_loops_checkbox->setChecked(
47 Settings::values.barrier_feedback_loops.GetValue());
46 48
47 if (Settings::IsConfiguringGlobal()) { 49 if (Settings::IsConfiguringGlobal()) {
48 ui->gpu_accuracy->setCurrentIndex( 50 ui->gpu_accuracy->setCurrentIndex(
@@ -94,6 +96,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
94 enable_compute_pipelines); 96 enable_compute_pipelines);
95 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_video_framerate, 97 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_video_framerate,
96 ui->use_video_framerate_checkbox, use_video_framerate); 98 ui->use_video_framerate_checkbox, use_video_framerate);
99 ConfigurationShared::ApplyPerGameSetting(&Settings::values.barrier_feedback_loops,
100 ui->barrier_feedback_loops_checkbox,
101 barrier_feedback_loops);
97} 102}
98 103
99void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { 104void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
@@ -130,6 +135,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
130 Settings::values.enable_compute_pipelines.UsingGlobal()); 135 Settings::values.enable_compute_pipelines.UsingGlobal());
131 ui->use_video_framerate_checkbox->setEnabled( 136 ui->use_video_framerate_checkbox->setEnabled(
132 Settings::values.use_video_framerate.UsingGlobal()); 137 Settings::values.use_video_framerate.UsingGlobal());
138 ui->barrier_feedback_loops_checkbox->setEnabled(
139 Settings::values.barrier_feedback_loops.UsingGlobal());
133 140
134 return; 141 return;
135 } 142 }
@@ -157,6 +164,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
157 ConfigurationShared::SetColoredTristate(ui->use_video_framerate_checkbox, 164 ConfigurationShared::SetColoredTristate(ui->use_video_framerate_checkbox,
158 Settings::values.use_video_framerate, 165 Settings::values.use_video_framerate,
159 use_video_framerate); 166 use_video_framerate);
167 ConfigurationShared::SetColoredTristate(ui->barrier_feedback_loops_checkbox,
168 Settings::values.barrier_feedback_loops,
169 barrier_feedback_loops);
160 ConfigurationShared::SetColoredComboBox( 170 ConfigurationShared::SetColoredComboBox(
161 ui->gpu_accuracy, ui->label_gpu_accuracy, 171 ui->gpu_accuracy, ui->label_gpu_accuracy,
162 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); 172 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index a4dc8ceb0..369a7c83e 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -48,6 +48,7 @@ private:
48 ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; 48 ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache;
49 ConfigurationShared::CheckState enable_compute_pipelines; 49 ConfigurationShared::CheckState enable_compute_pipelines;
50 ConfigurationShared::CheckState use_video_framerate; 50 ConfigurationShared::CheckState use_video_framerate;
51 ConfigurationShared::CheckState barrier_feedback_loops;
51 52
52 const Core::System& system; 53 const Core::System& system;
53}; 54};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index e7f0ef6be..d527a6f38 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -202,6 +202,16 @@ Compute pipelines are always enabled on all other drivers.</string>
202 </widget> 202 </widget>
203 </item> 203 </item>
204 <item> 204 <item>
205 <widget class="QCheckBox" name="barrier_feedback_loops_checkbox">
206 <property name="toolTip">
207 <string>Improves rendering of transparency effects in specific games.</string>
208 </property>
209 <property name="text">
210 <string>Barrier feedback loops</string>
211 </property>
212 </widget>
213 </item>
214 <item>
205 <widget class="QWidget" name="af_layout" native="true"> 215 <widget class="QWidget" name="af_layout" native="true">
206 <layout class="QHBoxLayout" name="horizontalLayout_1"> 216 <layout class="QHBoxLayout" name="horizontalLayout_1">
207 <property name="leftMargin"> 217 <property name="leftMargin">
diff --git a/src/yuzu/configuration/configure_per_game.cpp b/src/yuzu/configuration/configure_per_game.cpp
index 7ac162586..eb96e6068 100644
--- a/src/yuzu/configuration/configure_per_game.cpp
+++ b/src/yuzu/configuration/configure_per_game.cpp
@@ -6,6 +6,7 @@
6#include <memory> 6#include <memory>
7#include <string> 7#include <string>
8#include <utility> 8#include <utility>
9#include <vector>
9 10
10#include <fmt/format.h> 11#include <fmt/format.h>
11 12
@@ -34,8 +35,10 @@
34#include "yuzu/configuration/configure_system.h" 35#include "yuzu/configuration/configure_system.h"
35#include "yuzu/uisettings.h" 36#include "yuzu/uisettings.h"
36#include "yuzu/util/util.h" 37#include "yuzu/util/util.h"
38#include "yuzu/vk_device_info.h"
37 39
38ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name, 40ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name,
41 std::vector<VkDeviceInfo::Record>& vk_device_records,
39 Core::System& system_) 42 Core::System& system_)
40 : QDialog(parent), 43 : QDialog(parent),
41 ui(std::make_unique<Ui::ConfigurePerGame>()), title_id{title_id_}, system{system_} { 44 ui(std::make_unique<Ui::ConfigurePerGame>()), title_id{title_id_}, system{system_} {
@@ -50,7 +53,7 @@ ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const std::st
50 general_tab = std::make_unique<ConfigureGeneral>(system_, this); 53 general_tab = std::make_unique<ConfigureGeneral>(system_, this);
51 graphics_advanced_tab = std::make_unique<ConfigureGraphicsAdvanced>(system_, this); 54 graphics_advanced_tab = std::make_unique<ConfigureGraphicsAdvanced>(system_, this);
52 graphics_tab = std::make_unique<ConfigureGraphics>( 55 graphics_tab = std::make_unique<ConfigureGraphics>(
53 system_, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this); 56 system_, vk_device_records, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this);
54 input_tab = std::make_unique<ConfigureInputPerGame>(system_, game_config.get(), this); 57 input_tab = std::make_unique<ConfigureInputPerGame>(system_, game_config.get(), this);
55 system_tab = std::make_unique<ConfigureSystem>(system_, this); 58 system_tab = std::make_unique<ConfigureSystem>(system_, this);
56 59
diff --git a/src/yuzu/configuration/configure_per_game.h b/src/yuzu/configuration/configure_per_game.h
index 85752f1fa..7ec1ded06 100644
--- a/src/yuzu/configuration/configure_per_game.h
+++ b/src/yuzu/configuration/configure_per_game.h
@@ -5,11 +5,13 @@
5 5
6#include <memory> 6#include <memory>
7#include <string> 7#include <string>
8#include <vector>
8 9
9#include <QDialog> 10#include <QDialog>
10#include <QList> 11#include <QList>
11 12
12#include "core/file_sys/vfs_types.h" 13#include "core/file_sys/vfs_types.h"
14#include "vk_device_info.h"
13#include "yuzu/configuration/config.h" 15#include "yuzu/configuration/config.h"
14 16
15namespace Core { 17namespace Core {
@@ -45,6 +47,7 @@ class ConfigurePerGame : public QDialog {
45public: 47public:
46 // Cannot use std::filesystem::path due to https://bugreports.qt.io/browse/QTBUG-73263 48 // Cannot use std::filesystem::path due to https://bugreports.qt.io/browse/QTBUG-73263
47 explicit ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name, 49 explicit ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name,
50 std::vector<VkDeviceInfo::Record>& vk_device_records,
48 Core::System& system_); 51 Core::System& system_);
49 ~ConfigurePerGame() override; 52 ~ConfigurePerGame() override;
50 53
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index cba7c3cce..45a39451d 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -147,6 +147,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
147#include "yuzu/startup_checks.h" 147#include "yuzu/startup_checks.h"
148#include "yuzu/uisettings.h" 148#include "yuzu/uisettings.h"
149#include "yuzu/util/clickable_label.h" 149#include "yuzu/util/clickable_label.h"
150#include "yuzu/vk_device_info.h"
150 151
151#ifdef YUZU_DBGHELP 152#ifdef YUZU_DBGHELP
152#include "yuzu/mini_dump.h" 153#include "yuzu/mini_dump.h"
@@ -440,6 +441,8 @@ GMainWindow::GMainWindow(std::unique_ptr<Config> config_, bool has_broken_vulkan
440 441
441 renderer_status_button->setDisabled(true); 442 renderer_status_button->setDisabled(true);
442 renderer_status_button->setChecked(false); 443 renderer_status_button->setChecked(false);
444 } else {
445 VkDeviceInfo::PopulateRecords(vk_device_records, this->window()->windowHandle());
443 } 446 }
444 447
445#if defined(HAVE_SDL2) && !defined(_WIN32) 448#if defined(HAVE_SDL2) && !defined(_WIN32)
@@ -3494,7 +3497,8 @@ void GMainWindow::OnConfigure() {
3494 const auto old_language_index = Settings::values.language_index.GetValue(); 3497 const auto old_language_index = Settings::values.language_index.GetValue();
3495 3498
3496 Settings::SetConfiguringGlobal(true); 3499 Settings::SetConfiguringGlobal(true);
3497 ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(), *system, 3500 ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(),
3501 vk_device_records, *system,
3498 !multiplayer_state->IsHostingPublicRoom()); 3502 !multiplayer_state->IsHostingPublicRoom());
3499 connect(&configure_dialog, &ConfigureDialog::LanguageChanged, this, 3503 connect(&configure_dialog, &ConfigureDialog::LanguageChanged, this,
3500 &GMainWindow::OnLanguageChanged); 3504 &GMainWindow::OnLanguageChanged);
@@ -3765,7 +3769,7 @@ void GMainWindow::OpenPerGameConfiguration(u64 title_id, const std::string& file
3765 const auto v_file = Core::GetGameFileFromPath(vfs, file_name); 3769 const auto v_file = Core::GetGameFileFromPath(vfs, file_name);
3766 3770
3767 Settings::SetConfiguringGlobal(false); 3771 Settings::SetConfiguringGlobal(false);
3768 ConfigurePerGame dialog(this, title_id, file_name, *system); 3772 ConfigurePerGame dialog(this, title_id, file_name, vk_device_records, *system);
3769 dialog.LoadFromFile(v_file); 3773 dialog.LoadFromFile(v_file);
3770 const auto result = dialog.exec(); 3774 const auto result = dialog.exec();
3771 3775
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 6bb70972f..e0e775d87 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -118,6 +118,10 @@ enum class ReinitializeKeyBehavior {
118 Warning, 118 Warning,
119}; 119};
120 120
121namespace VkDeviceInfo {
122class Record;
123}
124
121class GMainWindow : public QMainWindow { 125class GMainWindow : public QMainWindow {
122 Q_OBJECT 126 Q_OBJECT
123 127
@@ -418,6 +422,8 @@ private:
418 422
419 GameListPlaceholder* game_list_placeholder; 423 GameListPlaceholder* game_list_placeholder;
420 424
425 std::vector<VkDeviceInfo::Record> vk_device_records;
426
421 // Status bar elements 427 // Status bar elements
422 QLabel* message_label = nullptr; 428 QLabel* message_label = nullptr;
423 QLabel* shader_building_label = nullptr; 429 QLabel* shader_building_label = nullptr;
diff --git a/src/yuzu/vk_device_info.cpp b/src/yuzu/vk_device_info.cpp
new file mode 100644
index 000000000..7c26a3dc7
--- /dev/null
+++ b/src/yuzu/vk_device_info.cpp
@@ -0,0 +1,61 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <utility>
5#include <vector>
6#include "common/dynamic_library.h"
7#include "common/logging/log.h"
8#include "video_core/vulkan_common/vulkan_device.h"
9#include "video_core/vulkan_common/vulkan_instance.h"
10#include "video_core/vulkan_common/vulkan_library.h"
11#include "video_core/vulkan_common/vulkan_surface.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
13#include "vulkan/vulkan_core.h"
14#include "yuzu/qt_common.h"
15#include "yuzu/vk_device_info.h"
16
17class QWindow;
18
19namespace VkDeviceInfo {
20Record::Record(std::string_view name_, const std::vector<VkPresentModeKHR>& vsync_modes_,
21 bool has_broken_compute_)
22 : name{name_}, vsync_support{vsync_modes_}, has_broken_compute{has_broken_compute_} {}
23
24Record::~Record() = default;
25
26void PopulateRecords(std::vector<Record>& records, QWindow* window) try {
27 using namespace Vulkan;
28
29 auto wsi = QtCommon::GetWindowSystemInfo(window);
30
31 vk::InstanceDispatch dld;
32 const auto library = OpenLibrary();
33 const vk::Instance instance = CreateInstance(*library, dld, VK_API_VERSION_1_1, wsi.type);
34 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
35 vk::SurfaceKHR surface = CreateSurface(instance, wsi);
36
37 records.clear();
38 records.reserve(physical_devices.size());
39 for (const VkPhysicalDevice device : physical_devices) {
40 const auto physical_device = vk::PhysicalDevice(device, dld);
41 const std::string name = physical_device.GetProperties().deviceName;
42 const std::vector<VkPresentModeKHR> present_modes =
43 physical_device.GetSurfacePresentModesKHR(*surface);
44
45 VkPhysicalDeviceDriverProperties driver_properties{};
46 driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
47 driver_properties.pNext = nullptr;
48 VkPhysicalDeviceProperties2 properties{};
49 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
50 properties.pNext = &driver_properties;
51 dld.vkGetPhysicalDeviceProperties2(physical_device, &properties);
52
53 bool has_broken_compute{Vulkan::Device::CheckBrokenCompute(
54 driver_properties.driverID, properties.properties.driverVersion)};
55
56 records.push_back(VkDeviceInfo::Record(name, present_modes, has_broken_compute));
57 }
58} catch (const Vulkan::vk::Exception& exception) {
59 LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
60}
61} // namespace VkDeviceInfo
diff --git a/src/yuzu/vk_device_info.h b/src/yuzu/vk_device_info.h
new file mode 100644
index 000000000..bda8262f4
--- /dev/null
+++ b/src/yuzu/vk_device_info.h
@@ -0,0 +1,36 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <iterator>
8#include <memory>
9#include <string>
10#include <string_view>
11#include <vector>
12#include "common/common_types.h"
13#include "vulkan/vulkan_core.h"
14
15class QWindow;
16
17namespace Settings {
18enum class VSyncMode : u32;
19}
20// #include "common/settings.h"
21
22namespace VkDeviceInfo {
23// Short class to record Vulkan driver information for configuration purposes
24class Record {
25public:
26 explicit Record(std::string_view name, const std::vector<VkPresentModeKHR>& vsync_modes,
27 bool has_broken_compute);
28 ~Record();
29
30 const std::string name;
31 const std::vector<VkPresentModeKHR> vsync_support;
32 const bool has_broken_compute;
33};
34
35void PopulateRecords(std::vector<Record>& records, QWindow* window);
36} // namespace VkDeviceInfo