summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/audio_core/audio_renderer.cpp13
-rw-r--r--src/audio_core/audio_renderer.h8
-rw-r--r--src/audio_core/cubeb_sink.cpp1
-rw-r--r--src/audio_core/stream.cpp12
-rw-r--r--src/common/CMakeLists.txt5
-rw-r--r--src/common/bit_set.h99
-rw-r--r--src/common/concepts.h4
-rw-r--r--src/common/multi_level_queue.h345
-rw-r--r--src/common/scope_exit.h2
-rw-r--r--src/common/telemetry.h4
-rw-r--r--src/common/thread_worker.cpp58
-rw-r--r--src/common/thread_worker.h30
-rw-r--r--src/common/x64/xbyak_abi.h20
-rw-r--r--src/core/CMakeLists.txt17
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp3
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp5
-rw-r--r--src/core/core.cpp34
-rw-r--r--src/core/core.h20
-rw-r--r--src/core/cpu_manager.cpp98
-rw-r--r--src/core/file_sys/common_funcs.h56
-rw-r--r--src/core/file_sys/content_archive.cpp31
-rw-r--r--src/core/file_sys/content_archive.h8
-rw-r--r--src/core/file_sys/nca_patch.cpp2
-rw-r--r--src/core/file_sys/nca_patch.h2
-rw-r--r--src/core/file_sys/patch_manager.cpp4
-rw-r--r--src/core/file_sys/romfs_factory.cpp22
-rw-r--r--src/core/file_sys/romfs_factory.h4
-rw-r--r--src/core/file_sys/system_archive/data/font_nintendo_extended.cpp555
-rw-r--r--src/core/file_sys/system_archive/data/font_nintendo_extended.h2
-rw-r--r--src/core/file_sys/system_archive/system_version.cpp12
-rw-r--r--src/core/file_sys/vfs.cpp32
-rw-r--r--src/core/file_sys/vfs.h44
-rw-r--r--src/core/file_sys/vfs_concat.cpp18
-rw-r--r--src/core/file_sys/vfs_concat.h2
-rw-r--r--src/core/file_sys/vfs_layered.cpp24
-rw-r--r--src/core/file_sys/vfs_layered.h18
-rw-r--r--src/core/file_sys/vfs_offset.cpp4
-rw-r--r--src/core/file_sys/vfs_offset.h6
-rw-r--r--src/core/file_sys/vfs_real.cpp24
-rw-r--r--src/core/file_sys/vfs_real.h24
-rw-r--r--src/core/file_sys/vfs_static.h2
-rw-r--r--src/core/file_sys/vfs_vector.cpp12
-rw-r--r--src/core/file_sys/vfs_vector.h26
-rw-r--r--src/core/file_sys/xts_archive.cpp6
-rw-r--r--src/core/file_sys/xts_archive.h6
-rw-r--r--src/core/frontend/applets/error.cpp7
-rw-r--r--src/core/frontend/applets/general_frontend.cpp68
-rw-r--r--src/core/frontend/applets/general_frontend.h51
-rw-r--r--src/core/frontend/applets/web_browser.cpp24
-rw-r--r--src/core/frontend/applets/web_browser.h20
-rw-r--r--src/core/frontend/input_interpreter.cpp45
-rw-r--r--src/core/frontend/input_interpreter.h120
-rw-r--r--src/core/hle/ipc_helpers.h67
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp21
-rw-r--r--src/core/hle/kernel/global_scheduler_context.cpp52
-rw-r--r--src/core/hle/kernel/global_scheduler_context.h81
-rw-r--r--src/core/hle/kernel/handle_table.cpp4
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp41
-rw-r--r--src/core/hle/kernel/hle_ipc.h17
-rw-r--r--src/core/hle/kernel/k_affinity_mask.h58
-rw-r--r--src/core/hle/kernel/k_priority_queue.h451
-rw-r--r--src/core/hle/kernel/k_scheduler.cpp784
-rw-r--r--src/core/hle/kernel/k_scheduler.h201
-rw-r--r--src/core/hle/kernel/k_scheduler_lock.h75
-rw-r--r--src/core/hle/kernel/k_scoped_lock.h41
-rw-r--r--src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h50
-rw-r--r--src/core/hle/kernel/kernel.cpp174
-rw-r--r--src/core/hle/kernel/kernel.h34
-rw-r--r--src/core/hle/kernel/memory/memory_block.h6
-rw-r--r--src/core/hle/kernel/memory/memory_block_manager.h4
-rw-r--r--src/core/hle/kernel/mutex.cpp12
-rw-r--r--src/core/hle/kernel/physical_core.cpp8
-rw-r--r--src/core/hle/kernel/physical_core.h15
-rw-r--r--src/core/hle/kernel/process.cpp14
-rw-r--r--src/core/hle/kernel/process.h13
-rw-r--r--src/core/hle/kernel/process_capability.cpp2
-rw-r--r--src/core/hle/kernel/readable_event.cpp4
-rw-r--r--src/core/hle/kernel/resource_limit.cpp4
-rw-r--r--src/core/hle/kernel/scheduler.cpp819
-rw-r--r--src/core/hle/kernel/scheduler.h320
-rw-r--r--src/core/hle/kernel/server_session.cpp36
-rw-r--r--src/core/hle/kernel/server_session.h12
-rw-r--r--src/core/hle/kernel/service_thread.cpp110
-rw-r--r--src/core/hle/kernel/service_thread.h28
-rw-r--r--src/core/hle/kernel/svc.cpp140
-rw-r--r--src/core/hle/kernel/synchronization.cpp11
-rw-r--r--src/core/hle/kernel/thread.cpp79
-rw-r--r--src/core/hle/kernel/thread.h114
-rw-r--r--src/core/hle/kernel/time_manager.cpp17
-rw-r--r--src/core/hle/service/am/am.cpp12
-rw-r--r--src/core/hle/service/am/applets/applets.cpp35
-rw-r--r--src/core/hle/service/am/applets/applets.h20
-rw-r--r--src/core/hle/service/am/applets/controller.cpp23
-rw-r--r--src/core/hle/service/am/applets/error.cpp4
-rw-r--r--src/core/hle/service/am/applets/general_backend.cpp6
-rw-r--r--src/core/hle/service/am/applets/web_browser.cpp792
-rw-r--r--src/core/hle/service/am/applets/web_browser.h80
-rw-r--r--src/core/hle/service/am/applets/web_types.h178
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp88
-rw-r--r--src/core/hle/service/aoc/aoc_u.h2
-rw-r--r--src/core/hle/service/apm/controller.cpp3
-rw-r--r--src/core/hle/service/apm/interface.cpp7
-rw-r--r--src/core/hle/service/audio/audout_u.cpp6
-rw-r--r--src/core/hle/service/audio/audren_u.cpp14
-rw-r--r--src/core/hle/service/bcat/backend/boxcat.cpp2
-rw-r--r--src/core/hle/service/fatal/fatal.cpp5
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp41
-rw-r--r--src/core/hle/service/filesystem/filesystem.h4
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp57
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h1
-rw-r--r--src/core/hle/service/friend/friend.cpp3
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp43
-rw-r--r--src/core/hle/service/hid/controllers/npad.h7
-rw-r--r--src/core/hle/service/hid/hid.cpp2
-rw-r--r--src/core/hle/service/lm/lm.cpp2
-rw-r--r--src/core/hle/service/ncm/ncm.cpp2
-rw-r--r--src/core/hle/service/nim/nim.cpp14
-rw-r--r--src/core/hle/service/ns/ns.cpp11
-rw-r--r--src/core/hle/service/ns/pl_u.cpp39
-rw-r--r--src/core/hle/service/ns/pl_u.h19
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h33
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h8
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp92
-rw-r--r--src/core/hle/service/nvdrv/nvdata.h11
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp13
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h6
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp152
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h20
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp34
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h9
-rw-r--r--src/core/hle/service/pcie/pcie.cpp2
-rw-r--r--src/core/hle/service/prepo/prepo.cpp4
-rw-r--r--src/core/hle/service/service.cpp30
-rw-r--r--src/core/hle/service/service.h16
-rw-r--r--src/core/hle/service/set/set_sys.cpp4
-rw-r--r--src/core/hle/service/sockets/blocking_worker.h161
-rw-r--r--src/core/hle/service/sockets/bsd.cpp138
-rw-r--r--src/core/hle/service/sockets/bsd.h9
-rw-r--r--src/core/hle/service/sockets/sockets.h23
-rw-r--r--src/core/hle/service/sockets/sockets_translate.cpp62
-rw-r--r--src/core/hle/service/sockets/sockets_translate.h4
-rw-r--r--src/core/hle/service/time/time.cpp2
-rw-r--r--src/core/hle/service/vi/vi.cpp75
-rw-r--r--src/core/loader/deconstructed_rom_directory.h2
-rw-r--r--src/core/loader/elf.h2
-rw-r--r--src/core/loader/kip.h2
-rw-r--r--src/core/loader/nax.h2
-rw-r--r--src/core/loader/nca.h2
-rw-r--r--src/core/loader/nro.h2
-rw-r--r--src/core/loader/nso.h2
-rw-r--r--src/core/loader/nsp.h2
-rw-r--r--src/core/loader/xci.h2
-rw-r--r--src/core/memory.cpp54
-rw-r--r--src/core/network/network.cpp62
-rw-r--r--src/core/network/network.h24
-rw-r--r--src/core/network/sockets.h4
-rw-r--r--src/core/settings.cpp7
-rw-r--r--src/core/settings.h5
-rwxr-xr-xsrc/input_common/analog_from_button.cpp22
-rw-r--r--src/input_common/gcadapter/gc_poller.cpp28
-rw-r--r--src/input_common/mouse/mouse_poller.cpp25
-rw-r--r--src/input_common/sdl/sdl_impl.cpp26
-rw-r--r--src/input_common/udp/client.cpp5
-rw-r--r--src/tests/CMakeLists.txt1
-rw-r--r--src/tests/common/multi_level_queue.cpp55
-rw-r--r--src/video_core/CMakeLists.txt192
-rw-r--r--src/video_core/buffer_cache/buffer_block.h19
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h21
-rw-r--r--src/video_core/buffer_cache/map_interval.h3
-rw-r--r--src/video_core/cdma_pusher.cpp30
-rw-r--r--src/video_core/cdma_pusher.h16
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp4
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp42
-rw-r--r--src/video_core/command_classes/codecs/vp9.h2
-rw-r--r--src/video_core/command_classes/vic.cpp10
-rw-r--r--src/video_core/compatible_formats.cpp142
-rw-r--r--src/video_core/compatible_formats.h23
-rw-r--r--src/video_core/delayed_destruction_ring.h32
-rw-r--r--src/video_core/dirty_flags.cpp9
-rw-r--r--src/video_core/dirty_flags.h3
-rw-r--r--src/video_core/dma_pusher.cpp9
-rw-r--r--src/video_core/dma_pusher.h14
-rw-r--r--src/video_core/engines/engine_upload.cpp8
-rw-r--r--src/video_core/engines/engine_upload.h4
-rw-r--r--src/video_core/engines/fermi_2d.cpp90
-rw-r--r--src/video_core/engines/fermi_2d.h331
-rw-r--r--src/video_core/engines/kepler_compute.cpp26
-rw-r--r--src/video_core/engines/kepler_compute.h5
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/kepler_memory.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp56
-rw-r--r--src/video_core/engines/maxwell_3d.h194
-rw-r--r--src/video_core/engines/maxwell_dma.cpp9
-rw-r--r--src/video_core/engines/maxwell_dma.h16
-rw-r--r--src/video_core/engines/shader_bytecode.h6
-rw-r--r--src/video_core/fence_manager.h25
-rw-r--r--src/video_core/framebuffer_config.h31
-rw-r--r--src/video_core/gpu.cpp95
-rw-r--r--src/video_core/gpu.h67
-rw-r--r--src/video_core/gpu_asynch.cpp86
-rw-r--r--src/video_core/gpu_asynch.h47
-rw-r--r--src/video_core/gpu_synch.cpp61
-rw-r--r--src/video_core/gpu_synch.h41
-rw-r--r--src/video_core/gpu_thread.cpp50
-rw-r--r--src/video_core/gpu_thread.h33
-rw-r--r--src/video_core/guest_driver.h4
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt67
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_2d.comp122
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_3d.comp125
-rw-r--r--src/video_core/host_shaders/convert_depth_to_float.frag13
-rw-r--r--src/video_core/host_shaders/convert_float_to_depth.frag13
-rw-r--r--src/video_core/host_shaders/full_screen_triangle.vert29
-rw-r--r--src/video_core/host_shaders/opengl_copy_bc4.comp70
-rw-r--r--src/video_core/host_shaders/opengl_present.frag4
-rw-r--r--src/video_core/host_shaders/opengl_present.vert4
-rw-r--r--src/video_core/host_shaders/pitch_unswizzle.comp86
-rw-r--r--src/video_core/host_shaders/vulkan_blit_color_float.frag14
-rw-r--r--src/video_core/host_shaders/vulkan_blit_depth_stencil.frag16
-rw-r--r--src/video_core/host_shaders/vulkan_present.frag (renamed from src/video_core/renderer_vulkan/shaders/blit.frag)9
-rw-r--r--src/video_core/host_shaders/vulkan_present.vert (renamed from src/video_core/renderer_vulkan/shaders/blit.vert)9
-rw-r--r--src/video_core/host_shaders/vulkan_quad_array.comp (renamed from src/video_core/renderer_vulkan/shaders/quad_array.comp)9
-rw-r--r--src/video_core/host_shaders/vulkan_quad_indexed.comp (renamed from src/video_core/renderer_vulkan/shaders/quad_indexed.comp)9
-rw-r--r--src/video_core/host_shaders/vulkan_uint8.comp (renamed from src/video_core/renderer_vulkan/shaders/uint8.comp)9
-rw-r--r--src/video_core/macro/macro_hle.cpp6
-rw-r--r--src/video_core/macro/macro_hle.h2
-rw-r--r--src/video_core/macro/macro_interpreter.cpp27
-rw-r--r--src/video_core/macro/macro_interpreter.h10
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp21
-rw-r--r--src/video_core/macro/macro_jit_x64.h4
-rw-r--r--src/video_core/memory_manager.cpp5
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/morton.cpp250
-rw-r--r--src/video_core/morton.h18
-rw-r--r--src/video_core/query_cache.h8
-rw-r--r--src/video_core/rasterizer_interface.h12
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp60
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp33
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_device.h13
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp85
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h68
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp514
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h71
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp55
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h15
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h19
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1455
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h285
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h43
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp61
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h9
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp224
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h51
-rw-r--r--src/video_core/renderer_opengl/utils.cpp42
-rw-r--r--src/video_core/renderer_opengl/utils.h16
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp624
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h97
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp1
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp69
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp17
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp301
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp117
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp359
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h27
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp126
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp19
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp78
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_image.cpp135
-rw-r--r--src/video_core/renderer_vulkan/vk_image.h84
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp707
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h139
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp158
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp79
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp46
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp1472
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h327
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp85
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h142
-rw-r--r--src/video_core/sampler_cache.cpp21
-rw-r--r--src/video_core/sampler_cache.h60
-rw-r--r--src/video_core/shader/ast.cpp13
-rw-r--r--src/video_core/shader/ast.h31
-rw-r--r--src/video_core/shader/async_shaders.cpp11
-rw-r--r--src/video_core/shader/async_shaders.h8
-rw-r--r--src/video_core/shader/control_flow.cpp20
-rw-r--r--src/video_core/shader/control_flow.h14
-rw-r--r--src/video_core/shader/decode.cpp12
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp3
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp9
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp5
-rw-r--r--src/video_core/shader/decode/conversion.cpp4
-rw-r--r--src/video_core/shader/decode/half_set.cpp14
-rw-r--r--src/video_core/shader/decode/image.cpp21
-rw-r--r--src/video_core/shader/decode/memory.cpp25
-rw-r--r--src/video_core/shader/decode/other.cpp44
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp65
-rw-r--r--src/video_core/shader/decode/warp.cpp2
-rw-r--r--src/video_core/shader/expr.h6
-rw-r--r--src/video_core/shader/node.h31
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp21
-rw-r--r--src/video_core/shader/shader_ir.h26
-rw-r--r--src/video_core/surface.cpp14
-rw-r--r--src/video_core/surface.h152
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp70
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h45
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp97
-rw-r--r--src/video_core/texture_cache/decode_bc4.h16
-rw-r--r--src/video_core/texture_cache/descriptor_table.h82
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp380
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h42
-rw-r--r--src/video_core/texture_cache/formatter.cpp95
-rw-r--r--src/video_core/texture_cache/formatter.h263
-rw-r--r--src/video_core/texture_cache/image_base.cpp216
-rw-r--r--src/video_core/texture_cache/image_base.h83
-rw-r--r--src/video_core/texture_cache/image_info.cpp189
-rw-r--r--src/video_core/texture_cache/image_info.h38
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp41
-rw-r--r--src/video_core/texture_cache/image_view_base.h47
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp88
-rw-r--r--src/video_core/texture_cache/image_view_info.h50
-rw-r--r--src/video_core/texture_cache/render_targets.h51
-rw-r--r--src/video_core/texture_cache/samples_helper.h55
-rw-r--r--src/video_core/texture_cache/slot_vector.h156
-rw-r--r--src/video_core/texture_cache/surface_base.cpp298
-rw-r--r--src/video_core/texture_cache/surface_base.h333
-rw-r--r--src/video_core/texture_cache/surface_params.cpp445
-rw-r--r--src/video_core/texture_cache/surface_params.h294
-rw-r--r--src/video_core/texture_cache/surface_view.cpp27
-rw-r--r--src/video_core/texture_cache/surface_view.h68
-rw-r--r--src/video_core/texture_cache/texture_cache.h2398
-rw-r--r--src/video_core/texture_cache/types.h140
-rw-r--r--src/video_core/texture_cache/util.cpp1232
-rw-r--r--src/video_core/texture_cache/util.h107
-rw-r--r--src/video_core/textures/astc.cpp58
-rw-r--r--src/video_core/textures/astc.h5
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h22
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h44
-rw-r--r--src/video_core/textures/texture.cpp16
-rw-r--r--src/video_core/textures/texture.h239
-rw-r--r--src/video_core/video_core.cpp14
-rw-r--r--src/yuzu/CMakeLists.txt10
-rw-r--r--src/yuzu/applets/controller.cpp4
-rw-r--r--src/yuzu/applets/controller.h2
-rw-r--r--src/yuzu/applets/error.cpp6
-rw-r--r--src/yuzu/applets/profile_select.cpp4
-rw-r--r--src/yuzu/applets/profile_select.h2
-rw-r--r--src/yuzu/applets/software_keyboard.cpp4
-rw-r--r--src/yuzu/applets/software_keyboard.h2
-rw-r--r--src/yuzu/applets/web_browser.cpp443
-rw-r--r--src/yuzu/applets/web_browser.h191
-rw-r--r--src/yuzu/applets/web_browser_scripts.h193
-rw-r--r--src/yuzu/bootmanager.cpp16
-rw-r--r--src/yuzu/bootmanager.h2
-rw-r--r--src/yuzu/configuration/config.cpp11
-rw-r--r--src/yuzu/configuration/configure_debug.cpp5
-rw-r--r--src/yuzu/configuration/configure_debug.ui74
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp11
-rw-r--r--src/yuzu/configuration/configure_input_advanced.cpp2
-rw-r--r--src/yuzu/configuration/configure_input_advanced.ui31
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp85
-rw-r--r--src/yuzu/configuration/configure_motion_touch.cpp4
-rw-r--r--src/yuzu/debugger/profiler.cpp2
-rw-r--r--src/yuzu/debugger/wait_tree.cpp12
-rw-r--r--src/yuzu/game_list_p.h3
-rw-r--r--src/yuzu/main.cpp333
-rw-r--r--src/yuzu/main.h18
-rw-r--r--src/yuzu/main.ui78
-rw-r--r--src/yuzu/util/url_request_interceptor.cpp32
-rw-r--r--src/yuzu/util/url_request_interceptor.h30
-rw-r--r--src/yuzu_cmd/CMakeLists.txt15
-rw-r--r--src/yuzu_cmd/config.cpp4
-rw-r--r--src/yuzu_cmd/default_ini.h3
-rw-r--r--src/yuzu_cmd/yuzu.cpp31
-rw-r--r--src/yuzu_tester/config.cpp1
-rw-r--r--src/yuzu_tester/yuzu.cpp1
446 files changed, 17970 insertions, 13564 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a22b564d6..8777df751 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -62,6 +62,7 @@ else()
62 -Werror=implicit-fallthrough 62 -Werror=implicit-fallthrough
63 -Werror=missing-declarations 63 -Werror=missing-declarations
64 -Werror=reorder 64 -Werror=reorder
65 -Werror=uninitialized
65 -Werror=unused-result 66 -Werror=unused-result
66 -Wextra 67 -Wextra
67 -Wmissing-declarations 68 -Wmissing-declarations
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 179560cd7..d2ce8c814 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -11,7 +11,6 @@
11#include "audio_core/info_updater.h" 11#include "audio_core/info_updater.h"
12#include "audio_core/voice_context.h" 12#include "audio_core/voice_context.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "core/hle/kernel/writable_event.h"
15#include "core/memory.h" 14#include "core/memory.h"
16#include "core/settings.h" 15#include "core/settings.h"
17 16
@@ -71,10 +70,9 @@ namespace {
71namespace AudioCore { 70namespace AudioCore {
72AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, 71AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
73 AudioCommon::AudioRendererParameter params, 72 AudioCommon::AudioRendererParameter params,
74 std::shared_ptr<Kernel::WritableEvent> buffer_event_, 73 Stream::ReleaseCallback&& release_callback,
75 std::size_t instance_number) 74 std::size_t instance_number)
76 : worker_params{params}, buffer_event{buffer_event_}, 75 : worker_params{params}, memory_pool_info(params.effect_count + params.voice_count * 4),
77 memory_pool_info(params.effect_count + params.voice_count * 4),
78 voice_context(params.voice_count), effect_context(params.effect_count), mix_context(), 76 voice_context(params.voice_count), effect_context(params.effect_count), mix_context(),
79 sink_context(params.sink_count), splitter_context(), 77 sink_context(params.sink_count), splitter_context(),
80 voices(params.voice_count), memory{memory_}, 78 voices(params.voice_count), memory{memory_},
@@ -85,10 +83,9 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
85 params.num_splitter_send_channels); 83 params.num_splitter_send_channels);
86 mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count); 84 mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count);
87 audio_out = std::make_unique<AudioCore::AudioOut>(); 85 audio_out = std::make_unique<AudioCore::AudioOut>();
88 stream = 86 stream = audio_out->OpenStream(
89 audio_out->OpenStream(core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, 87 core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
90 fmt::format("AudioRenderer-Instance{}", instance_number), 88 fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
91 [=]() { buffer_event_->Signal(); });
92 audio_out->StartStream(stream); 89 audio_out->StartStream(stream);
93 90
94 QueueMixedBuffer(0); 91 QueueMixedBuffer(0);
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 90f7eafa4..18567f618 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -27,10 +27,6 @@ namespace Core::Timing {
27class CoreTiming; 27class CoreTiming;
28} 28}
29 29
30namespace Kernel {
31class WritableEvent;
32}
33
34namespace Core::Memory { 30namespace Core::Memory {
35class Memory; 31class Memory;
36} 32}
@@ -44,8 +40,7 @@ class AudioRenderer {
44public: 40public:
45 AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, 41 AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
46 AudioCommon::AudioRendererParameter params, 42 AudioCommon::AudioRendererParameter params,
47 std::shared_ptr<Kernel::WritableEvent> buffer_event_, 43 Stream::ReleaseCallback&& release_callback, std::size_t instance_number);
48 std::size_t instance_number);
49 ~AudioRenderer(); 44 ~AudioRenderer();
50 45
51 [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, 46 [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params,
@@ -61,7 +56,6 @@ private:
61 BehaviorInfo behavior_info{}; 56 BehaviorInfo behavior_info{};
62 57
63 AudioCommon::AudioRendererParameter worker_params; 58 AudioCommon::AudioRendererParameter worker_params;
64 std::shared_ptr<Kernel::WritableEvent> buffer_event;
65 std::vector<ServerMemoryPoolInfo> memory_pool_info; 59 std::vector<ServerMemoryPoolInfo> memory_pool_info;
66 VoiceContext voice_context; 60 VoiceContext voice_context;
67 EffectContext effect_context; 61 EffectContext effect_context;
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index cf7b186e4..043447eaa 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -30,6 +30,7 @@ public:
30 params.rate = sample_rate; 30 params.rate = sample_rate;
31 params.channels = num_channels; 31 params.channels = num_channels;
32 params.format = CUBEB_SAMPLE_S16NE; 32 params.format = CUBEB_SAMPLE_S16NE;
33 params.prefs = CUBEB_STREAM_PREF_PERSIST;
33 switch (num_channels) { 34 switch (num_channels) {
34 case 1: 35 case 1:
35 params.layout = CUBEB_LAYOUT_MONO; 36 params.layout = CUBEB_LAYOUT_MONO;
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index eca296589..afe68c9ed 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -130,7 +130,11 @@ bool Stream::ContainsBuffer([[maybe_unused]] Buffer::Tag tag) const {
130std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) { 130std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) {
131 std::vector<Buffer::Tag> tags; 131 std::vector<Buffer::Tag> tags;
132 for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) { 132 for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) {
133 tags.push_back(released_buffers.front()->GetTag()); 133 if (released_buffers.front()) {
134 tags.push_back(released_buffers.front()->GetTag());
135 } else {
136 ASSERT_MSG(false, "Invalid tag in released_buffers!");
137 }
134 released_buffers.pop(); 138 released_buffers.pop();
135 } 139 }
136 return tags; 140 return tags;
@@ -140,7 +144,11 @@ std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers() {
140 std::vector<Buffer::Tag> tags; 144 std::vector<Buffer::Tag> tags;
141 tags.reserve(released_buffers.size()); 145 tags.reserve(released_buffers.size());
142 while (!released_buffers.empty()) { 146 while (!released_buffers.empty()) {
143 tags.push_back(released_buffers.front()->GetTag()); 147 if (released_buffers.front()) {
148 tags.push_back(released_buffers.front()->GetTag());
149 } else {
150 ASSERT_MSG(false, "Invalid tag in released_buffers!");
151 }
144 released_buffers.pop(); 152 released_buffers.pop();
145 } 153 }
146 return tags; 154 return tags;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 56c7e21f5..5c8003eb1 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -104,6 +104,7 @@ add_library(common STATIC
104 detached_tasks.h 104 detached_tasks.h
105 bit_cast.h 105 bit_cast.h
106 bit_field.h 106 bit_field.h
107 bit_set.h
107 bit_util.h 108 bit_util.h
108 cityhash.cpp 109 cityhash.cpp
109 cityhash.h 110 cityhash.h
@@ -140,7 +141,6 @@ add_library(common STATIC
140 microprofile.h 141 microprofile.h
141 microprofileui.h 142 microprofileui.h
142 misc.cpp 143 misc.cpp
143 multi_level_queue.h
144 page_table.cpp 144 page_table.cpp
145 page_table.h 145 page_table.h
146 param_package.cpp 146 param_package.cpp
@@ -162,6 +162,8 @@ add_library(common STATIC
162 thread.cpp 162 thread.cpp
163 thread.h 163 thread.h
164 thread_queue_list.h 164 thread_queue_list.h
165 thread_worker.cpp
166 thread_worker.h
165 threadsafe_queue.h 167 threadsafe_queue.h
166 time_zone.cpp 168 time_zone.cpp
167 time_zone.h 169 time_zone.h
@@ -209,7 +211,6 @@ else()
209endif() 211endif()
210 212
211create_target_directory_groups(common) 213create_target_directory_groups(common)
212find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
213 214
214target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) 215target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)
215target_link_libraries(common PRIVATE lz4::lz4 xbyak) 216target_link_libraries(common PRIVATE lz4::lz4 xbyak)
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
new file mode 100644
index 000000000..9235ad412
--- /dev/null
+++ b/src/common/bit_set.h
@@ -0,0 +1,99 @@
1/*
2 * Copyright (c) 2018-2020 Atmosphère-NX
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#pragma once
18
19#include <array>
20#include <bit>
21
22#include "common/alignment.h"
23#include "common/bit_util.h"
24#include "common/common_types.h"
25
26namespace Common {
27
28namespace impl {
29
30template <typename Storage, size_t N>
31class BitSet {
32
33public:
34 constexpr BitSet() = default;
35
36 constexpr void SetBit(size_t i) {
37 this->words[i / FlagsPerWord] |= GetBitMask(i % FlagsPerWord);
38 }
39
40 constexpr void ClearBit(size_t i) {
41 this->words[i / FlagsPerWord] &= ~GetBitMask(i % FlagsPerWord);
42 }
43
44 constexpr size_t CountLeadingZero() const {
45 for (size_t i = 0; i < NumWords; i++) {
46 if (this->words[i]) {
47 return FlagsPerWord * i + CountLeadingZeroImpl(this->words[i]);
48 }
49 }
50 return FlagsPerWord * NumWords;
51 }
52
53 constexpr size_t GetNextSet(size_t n) const {
54 for (size_t i = (n + 1) / FlagsPerWord; i < NumWords; i++) {
55 Storage word = this->words[i];
56 if (!IsAligned(n + 1, FlagsPerWord)) {
57 word &= GetBitMask(n % FlagsPerWord) - 1;
58 }
59 if (word) {
60 return FlagsPerWord * i + CountLeadingZeroImpl(word);
61 }
62 }
63 return FlagsPerWord * NumWords;
64 }
65
66private:
67 static_assert(std::is_unsigned_v<Storage>);
68 static_assert(sizeof(Storage) <= sizeof(u64));
69
70 static constexpr size_t FlagsPerWord = BitSize<Storage>();
71 static constexpr size_t NumWords = AlignUp(N, FlagsPerWord) / FlagsPerWord;
72
73 static constexpr auto CountLeadingZeroImpl(Storage word) {
74 return std::countl_zero(static_cast<unsigned long long>(word)) -
75 (BitSize<unsigned long long>() - FlagsPerWord);
76 }
77
78 static constexpr Storage GetBitMask(size_t bit) {
79 return Storage(1) << (FlagsPerWord - 1 - bit);
80 }
81
82 std::array<Storage, NumWords> words{};
83};
84
85} // namespace impl
86
87template <size_t N>
88using BitSet8 = impl::BitSet<u8, N>;
89
90template <size_t N>
91using BitSet16 = impl::BitSet<u16, N>;
92
93template <size_t N>
94using BitSet32 = impl::BitSet<u32, N>;
95
96template <size_t N>
97using BitSet64 = impl::BitSet<u64, N>;
98
99} // namespace Common
diff --git a/src/common/concepts.h b/src/common/concepts.h
index 5bef3ad67..aa08065a7 100644
--- a/src/common/concepts.h
+++ b/src/common/concepts.h
@@ -31,4 +31,8 @@ concept DerivedFrom = requires {
31 std::is_convertible_v<const volatile Derived*, const volatile Base*>; 31 std::is_convertible_v<const volatile Derived*, const volatile Base*>;
32}; 32};
33 33
34// TODO: Replace with std::convertible_to when libc++ implements it.
35template <typename From, typename To>
36concept ConvertibleTo = std::is_convertible_v<From, To>;
37
34} // namespace Common 38} // namespace Common
diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h
deleted file mode 100644
index 4b305bf40..000000000
--- a/src/common/multi_level_queue.h
+++ /dev/null
@@ -1,345 +0,0 @@
1// Copyright 2019 TuxSH
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <iterator>
9#include <list>
10#include <utility>
11
12#include "common/bit_util.h"
13#include "common/common_types.h"
14
15namespace Common {
16
17/**
18 * A MultiLevelQueue is a type of priority queue which has the following characteristics:
19 * - iteratable through each of its elements.
20 * - back can be obtained.
21 * - O(1) add, lookup (both front and back)
22 * - discrete priorities and a max of 64 priorities (limited domain)
23 * This type of priority queue is normaly used for managing threads within an scheduler
24 */
25template <typename T, std::size_t Depth>
26class MultiLevelQueue {
27public:
28 using value_type = T;
29 using reference = value_type&;
30 using const_reference = const value_type&;
31 using pointer = value_type*;
32 using const_pointer = const value_type*;
33
34 using difference_type = typename std::pointer_traits<pointer>::difference_type;
35 using size_type = std::size_t;
36
37 template <bool is_constant>
38 class iterator_impl {
39 public:
40 using iterator_category = std::bidirectional_iterator_tag;
41 using value_type = T;
42 using pointer = std::conditional_t<is_constant, T*, const T*>;
43 using reference = std::conditional_t<is_constant, const T&, T&>;
44 using difference_type = typename std::pointer_traits<pointer>::difference_type;
45
46 friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) {
47 if (lhs.IsEnd() && rhs.IsEnd())
48 return true;
49 return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it);
50 }
51
52 friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) {
53 return !operator==(lhs, rhs);
54 }
55
56 reference operator*() const {
57 return *it;
58 }
59
60 pointer operator->() const {
61 return it.operator->();
62 }
63
64 iterator_impl& operator++() {
65 if (IsEnd()) {
66 return *this;
67 }
68
69 ++it;
70
71 if (it == GetEndItForPrio()) {
72 u64 prios = mlq.used_priorities;
73 prios &= ~((1ULL << (current_priority + 1)) - 1);
74 if (prios == 0) {
75 current_priority = static_cast<u32>(mlq.depth());
76 } else {
77 current_priority = CountTrailingZeroes64(prios);
78 it = GetBeginItForPrio();
79 }
80 }
81 return *this;
82 }
83
84 iterator_impl& operator--() {
85 if (IsEnd()) {
86 if (mlq.used_priorities != 0) {
87 current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities);
88 it = GetEndItForPrio();
89 --it;
90 }
91 } else if (it == GetBeginItForPrio()) {
92 u64 prios = mlq.used_priorities;
93 prios &= (1ULL << current_priority) - 1;
94 if (prios != 0) {
95 current_priority = CountTrailingZeroes64(prios);
96 it = GetEndItForPrio();
97 --it;
98 }
99 } else {
100 --it;
101 }
102 return *this;
103 }
104
105 iterator_impl operator++(int) {
106 const iterator_impl v{*this};
107 ++(*this);
108 return v;
109 }
110
111 iterator_impl operator--(int) {
112 const iterator_impl v{*this};
113 --(*this);
114 return v;
115 }
116
117 // allow implicit const->non-const
118 iterator_impl(const iterator_impl<false>& other)
119 : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
120
121 iterator_impl(const iterator_impl<true>& other)
122 : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
123
124 iterator_impl& operator=(const iterator_impl<false>& other) {
125 mlq = other.mlq;
126 it = other.it;
127 current_priority = other.current_priority;
128 return *this;
129 }
130
131 friend class iterator_impl<true>;
132 iterator_impl() = default;
133
134 private:
135 friend class MultiLevelQueue;
136 using container_ref =
137 std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>;
138 using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator,
139 typename std::list<T>::iterator>;
140
141 explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority)
142 : mlq(mlq), it(it), current_priority(current_priority) {}
143 explicit iterator_impl(container_ref mlq, u32 current_priority)
144 : mlq(mlq), it(), current_priority(current_priority) {}
145
146 bool IsEnd() const {
147 return current_priority == mlq.depth();
148 }
149
150 list_iterator GetBeginItForPrio() const {
151 return mlq.levels[current_priority].begin();
152 }
153
154 list_iterator GetEndItForPrio() const {
155 return mlq.levels[current_priority].end();
156 }
157
158 container_ref mlq;
159 list_iterator it;
160 u32 current_priority;
161 };
162
163 using iterator = iterator_impl<false>;
164 using const_iterator = iterator_impl<true>;
165
166 void add(const T& element, u32 priority, bool send_back = true) {
167 if (send_back)
168 levels[priority].push_back(element);
169 else
170 levels[priority].push_front(element);
171 used_priorities |= 1ULL << priority;
172 }
173
174 void remove(const T& element, u32 priority) {
175 auto it = ListIterateTo(levels[priority], element);
176 if (it == levels[priority].end())
177 return;
178 levels[priority].erase(it);
179 if (levels[priority].empty()) {
180 used_priorities &= ~(1ULL << priority);
181 }
182 }
183
184 void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) {
185 remove(element, old_priority);
186 add(element, new_priority, !adjust_front);
187 }
188 void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) {
189 adjust(*it, old_priority, new_priority, adjust_front);
190 }
191
192 void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) {
193 ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority],
194 ListIterateTo(levels[priority], element));
195
196 other.used_priorities |= 1ULL << priority;
197
198 if (levels[priority].empty()) {
199 used_priorities &= ~(1ULL << priority);
200 }
201 }
202
203 void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) {
204 transfer_to_front(*it, priority, other);
205 }
206
207 void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) {
208 ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority],
209 ListIterateTo(levels[priority], element));
210
211 other.used_priorities |= 1ULL << priority;
212
213 if (levels[priority].empty()) {
214 used_priorities &= ~(1ULL << priority);
215 }
216 }
217
218 void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) {
219 transfer_to_back(*it, priority, other);
220 }
221
222 void yield(u32 priority, std::size_t n = 1) {
223 ListShiftForward(levels[priority], n);
224 }
225
226 [[nodiscard]] std::size_t depth() const {
227 return Depth;
228 }
229
230 [[nodiscard]] std::size_t size(u32 priority) const {
231 return levels[priority].size();
232 }
233
234 [[nodiscard]] std::size_t size() const {
235 u64 priorities = used_priorities;
236 std::size_t size = 0;
237 while (priorities != 0) {
238 const u64 current_priority = CountTrailingZeroes64(priorities);
239 size += levels[current_priority].size();
240 priorities &= ~(1ULL << current_priority);
241 }
242 return size;
243 }
244
245 [[nodiscard]] bool empty() const {
246 return used_priorities == 0;
247 }
248
249 [[nodiscard]] bool empty(u32 priority) const {
250 return (used_priorities & (1ULL << priority)) == 0;
251 }
252
253 [[nodiscard]] u32 highest_priority_set(u32 max_priority = 0) const {
254 const u64 priorities =
255 max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1));
256 return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities));
257 }
258
259 [[nodiscard]] u32 lowest_priority_set(u32 min_priority = Depth - 1) const {
260 const u64 priorities = min_priority >= Depth - 1
261 ? used_priorities
262 : (used_priorities & ((1ULL << (min_priority + 1)) - 1));
263 return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities);
264 }
265
266 [[nodiscard]] const_iterator cbegin(u32 max_prio = 0) const {
267 const u32 priority = highest_priority_set(max_prio);
268 return priority == Depth ? cend()
269 : const_iterator{*this, levels[priority].cbegin(), priority};
270 }
271 [[nodiscard]] const_iterator begin(u32 max_prio = 0) const {
272 return cbegin(max_prio);
273 }
274 [[nodiscard]] iterator begin(u32 max_prio = 0) {
275 const u32 priority = highest_priority_set(max_prio);
276 return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority};
277 }
278
279 [[nodiscard]] const_iterator cend(u32 min_prio = Depth - 1) const {
280 return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1);
281 }
282 [[nodiscard]] const_iterator end(u32 min_prio = Depth - 1) const {
283 return cend(min_prio);
284 }
285 [[nodiscard]] iterator end(u32 min_prio = Depth - 1) {
286 return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1);
287 }
288
289 [[nodiscard]] T& front(u32 max_priority = 0) {
290 const u32 priority = highest_priority_set(max_priority);
291 return levels[priority == Depth ? 0 : priority].front();
292 }
293 [[nodiscard]] const T& front(u32 max_priority = 0) const {
294 const u32 priority = highest_priority_set(max_priority);
295 return levels[priority == Depth ? 0 : priority].front();
296 }
297
298 [[nodiscard]] T& back(u32 min_priority = Depth - 1) {
299 const u32 priority = lowest_priority_set(min_priority); // intended
300 return levels[priority == Depth ? 63 : priority].back();
301 }
302 [[nodiscard]] const T& back(u32 min_priority = Depth - 1) const {
303 const u32 priority = lowest_priority_set(min_priority); // intended
304 return levels[priority == Depth ? 63 : priority].back();
305 }
306
307 void clear() {
308 used_priorities = 0;
309 for (std::size_t i = 0; i < Depth; i++) {
310 levels[i].clear();
311 }
312 }
313
314private:
315 using const_list_iterator = typename std::list<T>::const_iterator;
316
317 static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) {
318 if (shift >= list.size()) {
319 return;
320 }
321
322 const auto begin_range = list.begin();
323 const auto end_range = std::next(begin_range, shift);
324 list.splice(list.end(), list, begin_range, end_range);
325 }
326
327 static void ListSplice(std::list<T>& in_list, const_list_iterator position,
328 std::list<T>& out_list, const_list_iterator element) {
329 in_list.splice(position, out_list, element);
330 }
331
332 [[nodiscard]] static const_list_iterator ListIterateTo(const std::list<T>& list,
333 const T& element) {
334 auto it = list.cbegin();
335 while (it != list.cend() && *it != element) {
336 ++it;
337 }
338 return it;
339 }
340
341 std::array<std::list<T>, Depth> levels;
342 u64 used_priorities = 0;
343};
344
345} // namespace Common
diff --git a/src/common/scope_exit.h b/src/common/scope_exit.h
index 68ef5f197..fa46cb394 100644
--- a/src/common/scope_exit.h
+++ b/src/common/scope_exit.h
@@ -10,7 +10,7 @@
10namespace detail { 10namespace detail {
11template <typename Func> 11template <typename Func>
12struct ScopeExitHelper { 12struct ScopeExitHelper {
13 explicit ScopeExitHelper(Func&& func) : func(std::move(func)) {} 13 explicit ScopeExitHelper(Func&& func_) : func(std::move(func_)) {}
14 ~ScopeExitHelper() { 14 ~ScopeExitHelper() {
15 if (active) { 15 if (active) {
16 func(); 16 func();
diff --git a/src/common/telemetry.h b/src/common/telemetry.h
index a50c5d1de..49186e848 100644
--- a/src/common/telemetry.h
+++ b/src/common/telemetry.h
@@ -52,8 +52,8 @@ public:
52template <typename T> 52template <typename T>
53class Field : public FieldInterface { 53class Field : public FieldInterface {
54public: 54public:
55 Field(FieldType type, std::string name, T value) 55 Field(FieldType type_, std::string name_, T value_)
56 : name(std::move(name)), type(type), value(std::move(value)) {} 56 : name(std::move(name_)), type(type_), value(std::move(value_)) {}
57 57
58 Field(const Field&) = default; 58 Field(const Field&) = default;
59 Field& operator=(const Field&) = default; 59 Field& operator=(const Field&) = default;
diff --git a/src/common/thread_worker.cpp b/src/common/thread_worker.cpp
new file mode 100644
index 000000000..8f9bf447a
--- /dev/null
+++ b/src/common/thread_worker.cpp
@@ -0,0 +1,58 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/thread.h"
6#include "common/thread_worker.h"
7
8namespace Common {
9
10ThreadWorker::ThreadWorker(std::size_t num_workers, const std::string& name) {
11 for (std::size_t i = 0; i < num_workers; ++i)
12 threads.emplace_back([this, thread_name{std::string{name}}] {
13 Common::SetCurrentThreadName(thread_name.c_str());
14
15 // Wait for first request
16 {
17 std::unique_lock lock{queue_mutex};
18 condition.wait(lock, [this] { return stop || !requests.empty(); });
19 }
20
21 while (true) {
22 std::function<void()> task;
23
24 {
25 std::unique_lock lock{queue_mutex};
26 condition.wait(lock, [this] { return stop || !requests.empty(); });
27 if (stop || requests.empty()) {
28 return;
29 }
30 task = std::move(requests.front());
31 requests.pop();
32 }
33
34 task();
35 }
36 });
37}
38
39ThreadWorker::~ThreadWorker() {
40 {
41 std::unique_lock lock{queue_mutex};
42 stop = true;
43 }
44 condition.notify_all();
45 for (std::thread& thread : threads) {
46 thread.join();
47 }
48}
49
50void ThreadWorker::QueueWork(std::function<void()>&& work) {
51 {
52 std::unique_lock lock{queue_mutex};
53 requests.emplace(work);
54 }
55 condition.notify_one();
56}
57
58} // namespace Common
diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h
new file mode 100644
index 000000000..f1859971f
--- /dev/null
+++ b/src/common/thread_worker.h
@@ -0,0 +1,30 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <functional>
9#include <mutex>
10#include <string>
11#include <vector>
12#include <queue>
13
14namespace Common {
15
16class ThreadWorker final {
17public:
18 explicit ThreadWorker(std::size_t num_workers, const std::string& name);
19 ~ThreadWorker();
20 void QueueWork(std::function<void()>&& work);
21
22private:
23 std::vector<std::thread> threads;
24 std::queue<std::function<void()>> requests;
25 std::mutex queue_mutex;
26 std::condition_variable condition;
27 std::atomic_bool stop{};
28};
29
30} // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 26e4bfda5..c2c9b6134 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,25 +11,25 @@
11 11
12namespace Common::X64 { 12namespace Common::X64 {
13 13
14constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) { 14constexpr size_t RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind; 15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, 16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers."); 17 "RegSet only support GPRs and XMM registers.");
18 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15."); 18 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); 19 return static_cast<size_t>(reg.getIdx()) + (reg.getKind() == Kind::REG ? 0 : 16);
20} 20}
21 21
22constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { 22constexpr Xbyak::Reg64 IndexToReg64(size_t reg_index) {
23 ASSERT(reg_index < 16); 23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(static_cast<int>(reg_index)); 24 return Xbyak::Reg64(static_cast<int>(reg_index));
25} 25}
26 26
27constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) { 27constexpr Xbyak::Xmm IndexToXmm(size_t reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32); 28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(static_cast<int>(reg_index - 16)); 29 return Xbyak::Xmm(static_cast<int>(reg_index - 16));
30} 30}
31 31
32constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) { 32constexpr Xbyak::Reg IndexToReg(size_t reg_index) {
33 if (reg_index < 16) { 33 if (reg_index < 16) {
34 return IndexToReg64(reg_index); 34 return IndexToReg64(reg_index);
35 } else { 35 } else {
@@ -182,7 +182,7 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
182 size_t rsp_alignment, size_t needed_frame_size = 0) { 182 size_t rsp_alignment, size_t needed_frame_size = 0) {
183 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); 183 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
184 184
185 for (std::size_t i = 0; i < regs.size(); ++i) { 185 for (size_t i = 0; i < regs.size(); ++i) {
186 if (regs[i] && ABI_ALL_GPRS[i]) { 186 if (regs[i] && ABI_ALL_GPRS[i]) {
187 code.push(IndexToReg64(i)); 187 code.push(IndexToReg64(i));
188 } 188 }
@@ -192,7 +192,7 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
192 code.sub(code.rsp, frame_info.subtraction); 192 code.sub(code.rsp, frame_info.subtraction);
193 } 193 }
194 194
195 for (std::size_t i = 0; i < regs.size(); ++i) { 195 for (size_t i = 0; i < regs.size(); ++i) {
196 if (regs[i] && ABI_ALL_XMMS[i]) { 196 if (regs[i] && ABI_ALL_XMMS[i]) {
197 code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); 197 code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
198 frame_info.xmm_offset += 0x10; 198 frame_info.xmm_offset += 0x10;
@@ -206,7 +206,7 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits
206 size_t rsp_alignment, size_t needed_frame_size = 0) { 206 size_t rsp_alignment, size_t needed_frame_size = 0) {
207 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); 207 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
208 208
209 for (std::size_t i = 0; i < regs.size(); ++i) { 209 for (size_t i = 0; i < regs.size(); ++i) {
210 if (regs[i] && ABI_ALL_XMMS[i]) { 210 if (regs[i] && ABI_ALL_XMMS[i]) {
211 code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); 211 code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
212 frame_info.xmm_offset += 0x10; 212 frame_info.xmm_offset += 0x10;
@@ -218,8 +218,8 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits
218 } 218 }
219 219
220 // GPRs need to be popped in reverse order 220 // GPRs need to be popped in reverse order
221 for (std::size_t j = 0; j < regs.size(); ++j) { 221 for (size_t j = 0; j < regs.size(); ++j) {
222 const std::size_t i = regs.size() - j - 1; 222 const size_t i = regs.size() - j - 1;
223 if (regs[i] && ABI_ALL_GPRS[i]) { 223 if (regs[i] && ABI_ALL_GPRS[i]) {
224 code.pop(IndexToReg64(i)); 224 code.pop(IndexToReg64(i));
225 } 225 }
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 66de33799..01f3e9419 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -41,6 +41,7 @@ add_library(core STATIC
41 file_sys/bis_factory.h 41 file_sys/bis_factory.h
42 file_sys/card_image.cpp 42 file_sys/card_image.cpp
43 file_sys/card_image.h 43 file_sys/card_image.h
44 file_sys/common_funcs.h
44 file_sys/content_archive.cpp 45 file_sys/content_archive.cpp
45 file_sys/content_archive.h 46 file_sys/content_archive.h
46 file_sys/control_metadata.cpp 47 file_sys/control_metadata.cpp
@@ -134,6 +135,8 @@ add_library(core STATIC
134 frontend/emu_window.h 135 frontend/emu_window.h
135 frontend/framebuffer_layout.cpp 136 frontend/framebuffer_layout.cpp
136 frontend/framebuffer_layout.h 137 frontend/framebuffer_layout.h
138 frontend/input_interpreter.cpp
139 frontend/input_interpreter.h
137 frontend/input.h 140 frontend/input.h
138 hardware_interrupt_manager.cpp 141 hardware_interrupt_manager.cpp
139 hardware_interrupt_manager.h 142 hardware_interrupt_manager.h
@@ -148,10 +151,19 @@ add_library(core STATIC
148 hle/kernel/code_set.cpp 151 hle/kernel/code_set.cpp
149 hle/kernel/code_set.h 152 hle/kernel/code_set.h
150 hle/kernel/errors.h 153 hle/kernel/errors.h
154 hle/kernel/global_scheduler_context.cpp
155 hle/kernel/global_scheduler_context.h
151 hle/kernel/handle_table.cpp 156 hle/kernel/handle_table.cpp
152 hle/kernel/handle_table.h 157 hle/kernel/handle_table.h
153 hle/kernel/hle_ipc.cpp 158 hle/kernel/hle_ipc.cpp
154 hle/kernel/hle_ipc.h 159 hle/kernel/hle_ipc.h
160 hle/kernel/k_affinity_mask.h
161 hle/kernel/k_priority_queue.h
162 hle/kernel/k_scheduler.cpp
163 hle/kernel/k_scheduler.h
164 hle/kernel/k_scheduler_lock.h
165 hle/kernel/k_scoped_lock.h
166 hle/kernel/k_scoped_scheduler_lock_and_sleep.h
155 hle/kernel/kernel.cpp 167 hle/kernel/kernel.cpp
156 hle/kernel/kernel.h 168 hle/kernel/kernel.h
157 hle/kernel/memory/address_space_info.cpp 169 hle/kernel/memory/address_space_info.cpp
@@ -186,12 +198,12 @@ add_library(core STATIC
186 hle/kernel/readable_event.h 198 hle/kernel/readable_event.h
187 hle/kernel/resource_limit.cpp 199 hle/kernel/resource_limit.cpp
188 hle/kernel/resource_limit.h 200 hle/kernel/resource_limit.h
189 hle/kernel/scheduler.cpp
190 hle/kernel/scheduler.h
191 hle/kernel/server_port.cpp 201 hle/kernel/server_port.cpp
192 hle/kernel/server_port.h 202 hle/kernel/server_port.h
193 hle/kernel/server_session.cpp 203 hle/kernel/server_session.cpp
194 hle/kernel/server_session.h 204 hle/kernel/server_session.h
205 hle/kernel/service_thread.cpp
206 hle/kernel/service_thread.h
195 hle/kernel/session.cpp 207 hle/kernel/session.cpp
196 hle/kernel/session.h 208 hle/kernel/session.h
197 hle/kernel/shared_memory.cpp 209 hle/kernel/shared_memory.cpp
@@ -490,7 +502,6 @@ add_library(core STATIC
490 hle/service/sm/controller.h 502 hle/service/sm/controller.h
491 hle/service/sm/sm.cpp 503 hle/service/sm/sm.cpp
492 hle/service/sm/sm.h 504 hle/service/sm/sm.h
493 hle/service/sockets/blocking_worker.h
494 hle/service/sockets/bsd.cpp 505 hle/service/sockets/bsd.cpp
495 hle/service/sockets/bsd.h 506 hle/service/sockets/bsd.h
496 hle/service/sockets/ethc.cpp 507 hle/service/sockets/ethc.cpp
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 193fd7d62..e9c74b1a6 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -294,6 +294,9 @@ void ARM_Dynarmic_32::InvalidateCacheRange(VAddr addr, std::size_t size) {
294} 294}
295 295
296void ARM_Dynarmic_32::ClearExclusiveState() { 296void ARM_Dynarmic_32::ClearExclusiveState() {
297 if (!jit) {
298 return;
299 }
297 jit->ClearExclusiveState(); 300 jit->ClearExclusiveState();
298} 301}
299 302
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 0f0585d0f..7a4eb88a2 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -15,8 +15,8 @@
15#include "core/core.h" 15#include "core/core.h"
16#include "core/core_timing.h" 16#include "core/core_timing.h"
17#include "core/hardware_properties.h" 17#include "core/hardware_properties.h"
18#include "core/hle/kernel/k_scheduler.h"
18#include "core/hle/kernel/process.h" 19#include "core/hle/kernel/process.h"
19#include "core/hle/kernel/scheduler.h"
20#include "core/hle/kernel/svc.h" 20#include "core/hle/kernel/svc.h"
21#include "core/memory.h" 21#include "core/memory.h"
22#include "core/settings.h" 22#include "core/settings.h"
@@ -330,6 +330,9 @@ void ARM_Dynarmic_64::InvalidateCacheRange(VAddr addr, std::size_t size) {
330} 330}
331 331
332void ARM_Dynarmic_64::ClearExclusiveState() { 332void ARM_Dynarmic_64::ClearExclusiveState() {
333 if (!jit) {
334 return;
335 }
333 jit->ClearExclusiveState(); 336 jit->ClearExclusiveState();
334} 337}
335 338
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 01e4faac8..1a2002dec 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,10 +27,10 @@
27#include "core/file_sys/vfs_real.h" 27#include "core/file_sys/vfs_real.h"
28#include "core/hardware_interrupt_manager.h" 28#include "core/hardware_interrupt_manager.h"
29#include "core/hle/kernel/client_port.h" 29#include "core/hle/kernel/client_port.h"
30#include "core/hle/kernel/k_scheduler.h"
30#include "core/hle/kernel/kernel.h" 31#include "core/hle/kernel/kernel.h"
31#include "core/hle/kernel/physical_core.h" 32#include "core/hle/kernel/physical_core.h"
32#include "core/hle/kernel/process.h" 33#include "core/hle/kernel/process.h"
33#include "core/hle/kernel/scheduler.h"
34#include "core/hle/kernel/thread.h" 34#include "core/hle/kernel/thread.h"
35#include "core/hle/service/am/applets/applets.h" 35#include "core/hle/service/am/applets/applets.h"
36#include "core/hle/service/apm/controller.h" 36#include "core/hle/service/apm/controller.h"
@@ -159,7 +159,7 @@ struct System::Impl {
159 device_memory = std::make_unique<Core::DeviceMemory>(); 159 device_memory = std::make_unique<Core::DeviceMemory>();
160 160
161 is_multicore = Settings::values.use_multi_core.GetValue(); 161 is_multicore = Settings::values.use_multi_core.GetValue();
162 is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation.GetValue(); 162 is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue();
163 163
164 kernel.SetMulticore(is_multicore); 164 kernel.SetMulticore(is_multicore);
165 cpu_manager.SetMulticore(is_multicore); 165 cpu_manager.SetMulticore(is_multicore);
@@ -237,7 +237,7 @@ struct System::Impl {
237 Kernel::Process::Create(system, "main", Kernel::Process::ProcessType::Userland); 237 Kernel::Process::Create(system, "main", Kernel::Process::ProcessType::Userland);
238 const auto [load_result, load_parameters] = app_loader->Load(*main_process, system); 238 const auto [load_result, load_parameters] = app_loader->Load(*main_process, system);
239 if (load_result != Loader::ResultStatus::Success) { 239 if (load_result != Loader::ResultStatus::Success) {
240 LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result)); 240 LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", load_result);
241 Shutdown(); 241 Shutdown();
242 242
243 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + 243 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
@@ -267,8 +267,7 @@ struct System::Impl {
267 267
268 u64 title_id{0}; 268 u64 title_id{0};
269 if (app_loader->ReadProgramId(title_id) != Loader::ResultStatus::Success) { 269 if (app_loader->ReadProgramId(title_id) != Loader::ResultStatus::Success) {
270 LOG_ERROR(Core, "Failed to find title id for ROM (Error {})", 270 LOG_ERROR(Core, "Failed to find title id for ROM (Error {})", load_result);
271 static_cast<u32>(load_result));
272 } 271 }
273 perf_stats = std::make_unique<PerfStats>(title_id); 272 perf_stats = std::make_unique<PerfStats>(title_id);
274 // Reset counters and set time origin to current frame 273 // Reset counters and set time origin to current frame
@@ -308,7 +307,6 @@ struct System::Impl {
308 service_manager.reset(); 307 service_manager.reset();
309 cheat_engine.reset(); 308 cheat_engine.reset();
310 telemetry_session.reset(); 309 telemetry_session.reset();
311 device_memory.reset();
312 310
313 // Close all CPU/threading state 311 // Close all CPU/threading state
314 cpu_manager.Shutdown(); 312 cpu_manager.Shutdown();
@@ -508,14 +506,6 @@ std::size_t System::CurrentCoreIndex() const {
508 return core; 506 return core;
509} 507}
510 508
511Kernel::Scheduler& System::CurrentScheduler() {
512 return impl->kernel.CurrentScheduler();
513}
514
515const Kernel::Scheduler& System::CurrentScheduler() const {
516 return impl->kernel.CurrentScheduler();
517}
518
519Kernel::PhysicalCore& System::CurrentPhysicalCore() { 509Kernel::PhysicalCore& System::CurrentPhysicalCore() {
520 return impl->kernel.CurrentPhysicalCore(); 510 return impl->kernel.CurrentPhysicalCore();
521} 511}
@@ -524,22 +514,14 @@ const Kernel::PhysicalCore& System::CurrentPhysicalCore() const {
524 return impl->kernel.CurrentPhysicalCore(); 514 return impl->kernel.CurrentPhysicalCore();
525} 515}
526 516
527Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
528 return impl->kernel.Scheduler(core_index);
529}
530
531const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
532 return impl->kernel.Scheduler(core_index);
533}
534
535/// Gets the global scheduler 517/// Gets the global scheduler
536Kernel::GlobalScheduler& System::GlobalScheduler() { 518Kernel::GlobalSchedulerContext& System::GlobalSchedulerContext() {
537 return impl->kernel.GlobalScheduler(); 519 return impl->kernel.GlobalSchedulerContext();
538} 520}
539 521
540/// Gets the global scheduler 522/// Gets the global scheduler
541const Kernel::GlobalScheduler& System::GlobalScheduler() const { 523const Kernel::GlobalSchedulerContext& System::GlobalSchedulerContext() const {
542 return impl->kernel.GlobalScheduler(); 524 return impl->kernel.GlobalSchedulerContext();
543} 525}
544 526
545Kernel::Process* System::CurrentProcess() { 527Kernel::Process* System::CurrentProcess() {
diff --git a/src/core/core.h b/src/core/core.h
index 29b8fb92a..579a774e4 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -26,11 +26,11 @@ class VfsFilesystem;
26} // namespace FileSys 26} // namespace FileSys
27 27
28namespace Kernel { 28namespace Kernel {
29class GlobalScheduler; 29class GlobalSchedulerContext;
30class KernelCore; 30class KernelCore;
31class PhysicalCore; 31class PhysicalCore;
32class Process; 32class Process;
33class Scheduler; 33class KScheduler;
34} // namespace Kernel 34} // namespace Kernel
35 35
36namespace Loader { 36namespace Loader {
@@ -213,12 +213,6 @@ public:
213 /// Gets the index of the currently running CPU core 213 /// Gets the index of the currently running CPU core
214 [[nodiscard]] std::size_t CurrentCoreIndex() const; 214 [[nodiscard]] std::size_t CurrentCoreIndex() const;
215 215
216 /// Gets the scheduler for the CPU core that is currently running
217 [[nodiscard]] Kernel::Scheduler& CurrentScheduler();
218
219 /// Gets the scheduler for the CPU core that is currently running
220 [[nodiscard]] const Kernel::Scheduler& CurrentScheduler() const;
221
222 /// Gets the physical core for the CPU core that is currently running 216 /// Gets the physical core for the CPU core that is currently running
223 [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore(); 217 [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore();
224 218
@@ -261,17 +255,11 @@ public:
261 /// Gets an immutable reference to the renderer. 255 /// Gets an immutable reference to the renderer.
262 [[nodiscard]] const VideoCore::RendererBase& Renderer() const; 256 [[nodiscard]] const VideoCore::RendererBase& Renderer() const;
263 257
264 /// Gets the scheduler for the CPU core with the specified index
265 [[nodiscard]] Kernel::Scheduler& Scheduler(std::size_t core_index);
266
267 /// Gets the scheduler for the CPU core with the specified index
268 [[nodiscard]] const Kernel::Scheduler& Scheduler(std::size_t core_index) const;
269
270 /// Gets the global scheduler 258 /// Gets the global scheduler
271 [[nodiscard]] Kernel::GlobalScheduler& GlobalScheduler(); 259 [[nodiscard]] Kernel::GlobalSchedulerContext& GlobalSchedulerContext();
272 260
273 /// Gets the global scheduler 261 /// Gets the global scheduler
274 [[nodiscard]] const Kernel::GlobalScheduler& GlobalScheduler() const; 262 [[nodiscard]] const Kernel::GlobalSchedulerContext& GlobalSchedulerContext() const;
275 263
276 /// Gets the manager for the guest device memory 264 /// Gets the manager for the guest device memory
277 [[nodiscard]] Core::DeviceMemory& DeviceMemory(); 265 [[nodiscard]] Core::DeviceMemory& DeviceMemory();
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 0cff985e9..373395047 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -10,9 +10,9 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_timing.h" 11#include "core/core_timing.h"
12#include "core/cpu_manager.h" 12#include "core/cpu_manager.h"
13#include "core/hle/kernel/k_scheduler.h"
13#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/physical_core.h" 15#include "core/hle/kernel/physical_core.h"
15#include "core/hle/kernel/scheduler.h"
16#include "core/hle/kernel/thread.h" 16#include "core/hle/kernel/thread.h"
17#include "video_core/gpu.h" 17#include "video_core/gpu.h"
18 18
@@ -109,11 +109,8 @@ void* CpuManager::GetStartFuncParamater() {
109 109
110void CpuManager::MultiCoreRunGuestThread() { 110void CpuManager::MultiCoreRunGuestThread() {
111 auto& kernel = system.Kernel(); 111 auto& kernel = system.Kernel();
112 { 112 kernel.CurrentScheduler()->OnThreadStart();
113 auto& sched = kernel.CurrentScheduler(); 113 auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
114 sched.OnThreadStart();
115 }
116 auto* thread = kernel.CurrentScheduler().GetCurrentThread();
117 auto& host_context = thread->GetHostContext(); 114 auto& host_context = thread->GetHostContext();
118 host_context->SetRewindPoint(GuestRewindFunction, this); 115 host_context->SetRewindPoint(GuestRewindFunction, this);
119 MultiCoreRunGuestLoop(); 116 MultiCoreRunGuestLoop();
@@ -130,8 +127,8 @@ void CpuManager::MultiCoreRunGuestLoop() {
130 physical_core = &kernel.CurrentPhysicalCore(); 127 physical_core = &kernel.CurrentPhysicalCore();
131 } 128 }
132 system.ExitDynarmicProfile(); 129 system.ExitDynarmicProfile();
133 auto& scheduler = kernel.CurrentScheduler(); 130 physical_core->ArmInterface().ClearExclusiveState();
134 scheduler.TryDoContextSwitch(); 131 kernel.CurrentScheduler()->RescheduleCurrentCore();
135 } 132 }
136} 133}
137 134
@@ -140,25 +137,21 @@ void CpuManager::MultiCoreRunIdleThread() {
140 while (true) { 137 while (true) {
141 auto& physical_core = kernel.CurrentPhysicalCore(); 138 auto& physical_core = kernel.CurrentPhysicalCore();
142 physical_core.Idle(); 139 physical_core.Idle();
143 auto& scheduler = kernel.CurrentScheduler(); 140 kernel.CurrentScheduler()->RescheduleCurrentCore();
144 scheduler.TryDoContextSwitch();
145 } 141 }
146} 142}
147 143
148void CpuManager::MultiCoreRunSuspendThread() { 144void CpuManager::MultiCoreRunSuspendThread() {
149 auto& kernel = system.Kernel(); 145 auto& kernel = system.Kernel();
150 { 146 kernel.CurrentScheduler()->OnThreadStart();
151 auto& sched = kernel.CurrentScheduler();
152 sched.OnThreadStart();
153 }
154 while (true) { 147 while (true) {
155 auto core = kernel.GetCurrentHostThreadID(); 148 auto core = kernel.GetCurrentHostThreadID();
156 auto& scheduler = kernel.CurrentScheduler(); 149 auto& scheduler = *kernel.CurrentScheduler();
157 Kernel::Thread* current_thread = scheduler.GetCurrentThread(); 150 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
158 Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context); 151 Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context);
159 ASSERT(scheduler.ContextSwitchPending()); 152 ASSERT(scheduler.ContextSwitchPending());
160 ASSERT(core == kernel.GetCurrentHostThreadID()); 153 ASSERT(core == kernel.GetCurrentHostThreadID());
161 scheduler.TryDoContextSwitch(); 154 scheduler.RescheduleCurrentCore();
162 } 155 }
163} 156}
164 157
@@ -206,11 +199,8 @@ void CpuManager::MultiCorePause(bool paused) {
206 199
207void CpuManager::SingleCoreRunGuestThread() { 200void CpuManager::SingleCoreRunGuestThread() {
208 auto& kernel = system.Kernel(); 201 auto& kernel = system.Kernel();
209 { 202 kernel.CurrentScheduler()->OnThreadStart();
210 auto& sched = kernel.CurrentScheduler(); 203 auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
211 sched.OnThreadStart();
212 }
213 auto* thread = kernel.CurrentScheduler().GetCurrentThread();
214 auto& host_context = thread->GetHostContext(); 204 auto& host_context = thread->GetHostContext();
215 host_context->SetRewindPoint(GuestRewindFunction, this); 205 host_context->SetRewindPoint(GuestRewindFunction, this);
216 SingleCoreRunGuestLoop(); 206 SingleCoreRunGuestLoop();
@@ -218,7 +208,7 @@ void CpuManager::SingleCoreRunGuestThread() {
218 208
219void CpuManager::SingleCoreRunGuestLoop() { 209void CpuManager::SingleCoreRunGuestLoop() {
220 auto& kernel = system.Kernel(); 210 auto& kernel = system.Kernel();
221 auto* thread = kernel.CurrentScheduler().GetCurrentThread(); 211 auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
222 while (true) { 212 while (true) {
223 auto* physical_core = &kernel.CurrentPhysicalCore(); 213 auto* physical_core = &kernel.CurrentPhysicalCore();
224 system.EnterDynarmicProfile(); 214 system.EnterDynarmicProfile();
@@ -230,9 +220,10 @@ void CpuManager::SingleCoreRunGuestLoop() {
230 thread->SetPhantomMode(true); 220 thread->SetPhantomMode(true);
231 system.CoreTiming().Advance(); 221 system.CoreTiming().Advance();
232 thread->SetPhantomMode(false); 222 thread->SetPhantomMode(false);
223 physical_core->ArmInterface().ClearExclusiveState();
233 PreemptSingleCore(); 224 PreemptSingleCore();
234 auto& scheduler = kernel.Scheduler(current_core); 225 auto& scheduler = kernel.Scheduler(current_core);
235 scheduler.TryDoContextSwitch(); 226 scheduler.RescheduleCurrentCore();
236 } 227 }
237} 228}
238 229
@@ -244,51 +235,53 @@ void CpuManager::SingleCoreRunIdleThread() {
244 system.CoreTiming().AddTicks(1000U); 235 system.CoreTiming().AddTicks(1000U);
245 idle_count++; 236 idle_count++;
246 auto& scheduler = physical_core.Scheduler(); 237 auto& scheduler = physical_core.Scheduler();
247 scheduler.TryDoContextSwitch(); 238 scheduler.RescheduleCurrentCore();
248 } 239 }
249} 240}
250 241
251void CpuManager::SingleCoreRunSuspendThread() { 242void CpuManager::SingleCoreRunSuspendThread() {
252 auto& kernel = system.Kernel(); 243 auto& kernel = system.Kernel();
253 { 244 kernel.CurrentScheduler()->OnThreadStart();
254 auto& sched = kernel.CurrentScheduler();
255 sched.OnThreadStart();
256 }
257 while (true) { 245 while (true) {
258 auto core = kernel.GetCurrentHostThreadID(); 246 auto core = kernel.GetCurrentHostThreadID();
259 auto& scheduler = kernel.CurrentScheduler(); 247 auto& scheduler = *kernel.CurrentScheduler();
260 Kernel::Thread* current_thread = scheduler.GetCurrentThread(); 248 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
261 Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context); 249 Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context);
262 ASSERT(scheduler.ContextSwitchPending()); 250 ASSERT(scheduler.ContextSwitchPending());
263 ASSERT(core == kernel.GetCurrentHostThreadID()); 251 ASSERT(core == kernel.GetCurrentHostThreadID());
264 scheduler.TryDoContextSwitch(); 252 scheduler.RescheduleCurrentCore();
265 } 253 }
266} 254}
267 255
268void CpuManager::PreemptSingleCore(bool from_running_enviroment) { 256void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
269 std::size_t old_core = current_core; 257 {
270 auto& scheduler = system.Kernel().Scheduler(old_core); 258 auto& scheduler = system.Kernel().Scheduler(current_core);
271 Kernel::Thread* current_thread = scheduler.GetCurrentThread(); 259 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
272 if (idle_count >= 4 || from_running_enviroment) { 260 if (idle_count >= 4 || from_running_enviroment) {
273 if (!from_running_enviroment) { 261 if (!from_running_enviroment) {
274 system.CoreTiming().Idle(); 262 system.CoreTiming().Idle();
275 idle_count = 0; 263 idle_count = 0;
264 }
265 current_thread->SetPhantomMode(true);
266 system.CoreTiming().Advance();
267 current_thread->SetPhantomMode(false);
276 } 268 }
277 current_thread->SetPhantomMode(true); 269 current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
278 system.CoreTiming().Advance(); 270 system.CoreTiming().ResetTicks();
279 current_thread->SetPhantomMode(false); 271 scheduler.Unload(scheduler.GetCurrentThread());
272
273 auto& next_scheduler = system.Kernel().Scheduler(current_core);
274 Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext());
280 } 275 }
281 current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES); 276
282 system.CoreTiming().ResetTicks(); 277 // May have changed scheduler
283 scheduler.Unload(); 278 {
284 auto& next_scheduler = system.Kernel().Scheduler(current_core); 279 auto& scheduler = system.Kernel().Scheduler(current_core);
285 Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext()); 280 scheduler.Reload(scheduler.GetCurrentThread());
286 /// May have changed scheduler 281 auto* currrent_thread2 = scheduler.GetCurrentThread();
287 auto& current_scheduler = system.Kernel().Scheduler(current_core); 282 if (!currrent_thread2->IsIdleThread()) {
288 current_scheduler.Reload(); 283 idle_count = 0;
289 auto* currrent_thread2 = current_scheduler.GetCurrentThread(); 284 }
290 if (!currrent_thread2->IsIdleThread()) {
291 idle_count = 0;
292 } 285 }
293} 286}
294 287
@@ -369,8 +362,7 @@ void CpuManager::RunThread(std::size_t core) {
369 return; 362 return;
370 } 363 }
371 364
372 auto& scheduler = system.Kernel().CurrentScheduler(); 365 auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
373 Kernel::Thread* current_thread = scheduler.GetCurrentThread();
374 data.is_running = true; 366 data.is_running = true;
375 Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext()); 367 Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext());
376 data.is_running = false; 368 data.is_running = false;
diff --git a/src/core/file_sys/common_funcs.h b/src/core/file_sys/common_funcs.h
new file mode 100644
index 000000000..7ed97aa50
--- /dev/null
+++ b/src/core/file_sys/common_funcs.h
@@ -0,0 +1,56 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace FileSys {
10
11constexpr u64 AOC_TITLE_ID_MASK = 0x7FF;
12constexpr u64 AOC_TITLE_ID_OFFSET = 0x1000;
13constexpr u64 BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000;
14
15/**
16 * Gets the base title ID from a given title ID.
17 *
18 * @param title_id The title ID.
19 * @returns The base title ID.
20 */
21[[nodiscard]] constexpr u64 GetBaseTitleID(u64 title_id) {
22 return title_id & BASE_TITLE_ID_MASK;
23}
24
25/**
26 * Gets the base title ID with a program index offset from a given title ID.
27 *
28 * @param title_id The title ID.
29 * @param program_index The program index.
30 * @returns The base title ID with a program index offset.
31 */
32[[nodiscard]] constexpr u64 GetBaseTitleIDWithProgramIndex(u64 title_id, u64 program_index) {
33 return GetBaseTitleID(title_id) + program_index;
34}
35
36/**
37 * Gets the AOC (Add-On Content) base title ID from a given title ID.
38 *
39 * @param title_id The title ID.
40 * @returns The AOC base title ID.
41 */
42[[nodiscard]] constexpr u64 GetAOCBaseTitleID(u64 title_id) {
43 return GetBaseTitleID(title_id) + AOC_TITLE_ID_OFFSET;
44}
45
46/**
47 * Gets the AOC (Add-On Content) ID from a given AOC title ID.
48 *
49 * @param aoc_title_id The AOC title ID.
50 * @returns The AOC ID.
51 */
52[[nodiscard]] constexpr u64 GetAOCID(u64 aoc_title_id) {
53 return aoc_title_id & AOC_TITLE_ID_MASK;
54}
55
56} // namespace FileSys
diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp
index 76af47ff9..a6c0337fa 100644
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -410,8 +410,9 @@ u8 NCA::GetCryptoRevision() const {
410std::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type) const { 410std::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type) const {
411 const auto master_key_id = GetCryptoRevision(); 411 const auto master_key_id = GetCryptoRevision();
412 412
413 if (!keys.HasKey(Core::Crypto::S128KeyType::KeyArea, master_key_id, header.key_index)) 413 if (!keys.HasKey(Core::Crypto::S128KeyType::KeyArea, master_key_id, header.key_index)) {
414 return {}; 414 return std::nullopt;
415 }
415 416
416 std::vector<u8> key_area(header.key_area.begin(), header.key_area.end()); 417 std::vector<u8> key_area(header.key_area.begin(), header.key_area.end());
417 Core::Crypto::AESCipher<Core::Crypto::Key128> cipher( 418 Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(
@@ -420,15 +421,17 @@ std::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type
420 cipher.Transcode(key_area.data(), key_area.size(), key_area.data(), Core::Crypto::Op::Decrypt); 421 cipher.Transcode(key_area.data(), key_area.size(), key_area.data(), Core::Crypto::Op::Decrypt);
421 422
422 Core::Crypto::Key128 out; 423 Core::Crypto::Key128 out;
423 if (type == NCASectionCryptoType::XTS) 424 if (type == NCASectionCryptoType::XTS) {
424 std::copy(key_area.begin(), key_area.begin() + 0x10, out.begin()); 425 std::copy(key_area.begin(), key_area.begin() + 0x10, out.begin());
425 else if (type == NCASectionCryptoType::CTR || type == NCASectionCryptoType::BKTR) 426 } else if (type == NCASectionCryptoType::CTR || type == NCASectionCryptoType::BKTR) {
426 std::copy(key_area.begin() + 0x20, key_area.begin() + 0x30, out.begin()); 427 std::copy(key_area.begin() + 0x20, key_area.begin() + 0x30, out.begin());
427 else 428 } else {
428 LOG_CRITICAL(Crypto, "Called GetKeyAreaKey on invalid NCASectionCryptoType type={:02X}", 429 LOG_CRITICAL(Crypto, "Called GetKeyAreaKey on invalid NCASectionCryptoType type={:02X}",
429 static_cast<u8>(type)); 430 type);
431 }
432
430 u128 out_128{}; 433 u128 out_128{};
431 memcpy(out_128.data(), out.data(), 16); 434 std::memcpy(out_128.data(), out.data(), sizeof(u128));
432 LOG_TRACE(Crypto, "called with crypto_rev={:02X}, kak_index={:02X}, key={:016X}{:016X}", 435 LOG_TRACE(Crypto, "called with crypto_rev={:02X}, kak_index={:02X}, key={:016X}{:016X}",
433 master_key_id, header.key_index, out_128[1], out_128[0]); 436 master_key_id, header.key_index, out_128[1], out_128[0]);
434 437
@@ -507,7 +510,7 @@ VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 s
507 // TODO(DarkLordZach): Find a test case for XTS-encrypted NCAs 510 // TODO(DarkLordZach): Find a test case for XTS-encrypted NCAs
508 default: 511 default:
509 LOG_ERROR(Crypto, "called with unhandled crypto type={:02X}", 512 LOG_ERROR(Crypto, "called with unhandled crypto type={:02X}",
510 static_cast<u8>(s_header.raw.header.crypto_type)); 513 s_header.raw.header.crypto_type);
511 return nullptr; 514 return nullptr;
512 } 515 }
513} 516}
@@ -516,15 +519,17 @@ Loader::ResultStatus NCA::GetStatus() const {
516 return status; 519 return status;
517} 520}
518 521
519std::vector<std::shared_ptr<VfsFile>> NCA::GetFiles() const { 522std::vector<VirtualFile> NCA::GetFiles() const {
520 if (status != Loader::ResultStatus::Success) 523 if (status != Loader::ResultStatus::Success) {
521 return {}; 524 return {};
525 }
522 return files; 526 return files;
523} 527}
524 528
525std::vector<std::shared_ptr<VfsDirectory>> NCA::GetSubdirectories() const { 529std::vector<VirtualDir> NCA::GetSubdirectories() const {
526 if (status != Loader::ResultStatus::Success) 530 if (status != Loader::ResultStatus::Success) {
527 return {}; 531 return {};
532 }
528 return dirs; 533 return dirs;
529} 534}
530 535
@@ -532,7 +537,7 @@ std::string NCA::GetName() const {
532 return file->GetName(); 537 return file->GetName();
533} 538}
534 539
535std::shared_ptr<VfsDirectory> NCA::GetParentDirectory() const { 540VirtualDir NCA::GetParentDirectory() const {
536 return file->GetContainingDirectory(); 541 return file->GetContainingDirectory();
537} 542}
538 543
diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h
index 69292232a..e9eccdea3 100644
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -82,7 +82,7 @@ struct NCAHeader {
82}; 82};
83static_assert(sizeof(NCAHeader) == 0x400, "NCAHeader has incorrect size."); 83static_assert(sizeof(NCAHeader) == 0x400, "NCAHeader has incorrect size.");
84 84
85inline bool IsDirectoryExeFS(const std::shared_ptr<VfsDirectory>& pfs) { 85inline bool IsDirectoryExeFS(const VirtualDir& pfs) {
86 // According to switchbrew, an exefs must only contain these two files: 86 // According to switchbrew, an exefs must only contain these two files:
87 return pfs->GetFile("main") != nullptr && pfs->GetFile("main.npdm") != nullptr; 87 return pfs->GetFile("main") != nullptr && pfs->GetFile("main.npdm") != nullptr;
88} 88}
@@ -104,10 +104,10 @@ public:
104 104
105 Loader::ResultStatus GetStatus() const; 105 Loader::ResultStatus GetStatus() const;
106 106
107 std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; 107 std::vector<VirtualFile> GetFiles() const override;
108 std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; 108 std::vector<VirtualDir> GetSubdirectories() const override;
109 std::string GetName() const override; 109 std::string GetName() const override;
110 std::shared_ptr<VfsDirectory> GetParentDirectory() const override; 110 VirtualDir GetParentDirectory() const override;
111 111
112 NCAContentType GetType() const; 112 NCAContentType GetType() const;
113 u64 GetTitleId() const; 113 u64 GetTitleId() const;
diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp
index 5990a2fd5..adcf0732f 100644
--- a/src/core/file_sys/nca_patch.cpp
+++ b/src/core/file_sys/nca_patch.cpp
@@ -191,7 +191,7 @@ bool BKTR::Resize(std::size_t new_size) {
191 return false; 191 return false;
192} 192}
193 193
194std::shared_ptr<VfsDirectory> BKTR::GetContainingDirectory() const { 194VirtualDir BKTR::GetContainingDirectory() const {
195 return base_romfs->GetContainingDirectory(); 195 return base_romfs->GetContainingDirectory();
196} 196}
197 197
diff --git a/src/core/file_sys/nca_patch.h b/src/core/file_sys/nca_patch.h
index 60c544f8e..503cf473e 100644
--- a/src/core/file_sys/nca_patch.h
+++ b/src/core/file_sys/nca_patch.h
@@ -106,7 +106,7 @@ public:
106 106
107 bool Resize(std::size_t new_size) override; 107 bool Resize(std::size_t new_size) override;
108 108
109 std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; 109 VirtualDir GetContainingDirectory() const override;
110 110
111 bool IsWritable() const override; 111 bool IsWritable() const override;
112 112
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index e9d1607d0..7c3284df8 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -12,6 +12,7 @@
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/string_util.h" 13#include "common/string_util.h"
14#include "core/core.h" 14#include "core/core.h"
15#include "core/file_sys/common_funcs.h"
15#include "core/file_sys/content_archive.h" 16#include "core/file_sys/content_archive.h"
16#include "core/file_sys/control_metadata.h" 17#include "core/file_sys/control_metadata.h"
17#include "core/file_sys/ips_layer.h" 18#include "core/file_sys/ips_layer.h"
@@ -30,7 +31,6 @@ namespace FileSys {
30namespace { 31namespace {
31 32
32constexpr u32 SINGLE_BYTE_MODULUS = 0x100; 33constexpr u32 SINGLE_BYTE_MODULUS = 0x100;
33constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000;
34 34
35constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{ 35constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
36 "main", "main.npdm", "rtld", "sdk", "subsdk0", "subsdk1", "subsdk2", 36 "main", "main.npdm", "rtld", "sdk", "subsdk0", "subsdk1", "subsdk2",
@@ -532,7 +532,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
532 dlc_match.reserve(dlc_entries.size()); 532 dlc_match.reserve(dlc_entries.size());
533 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match), 533 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match),
534 [this](const ContentProviderEntry& entry) { 534 [this](const ContentProviderEntry& entry) {
535 return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == title_id && 535 return GetBaseTitleID(entry.title_id) == title_id &&
536 content_provider.GetEntry(entry)->GetStatus() == 536 content_provider.GetEntry(entry)->GetStatus() ==
537 Loader::ResultStatus::Success; 537 Loader::ResultStatus::Success;
538 }); 538 });
diff --git a/src/core/file_sys/romfs_factory.cpp b/src/core/file_sys/romfs_factory.cpp
index 987199747..f4e16e4be 100644
--- a/src/core/file_sys/romfs_factory.cpp
+++ b/src/core/file_sys/romfs_factory.cpp
@@ -7,6 +7,7 @@
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/file_sys/card_image.h" 9#include "core/file_sys/card_image.h"
10#include "core/file_sys/common_funcs.h"
10#include "core/file_sys/content_archive.h" 11#include "core/file_sys/content_archive.h"
11#include "core/file_sys/nca_metadata.h" 12#include "core/file_sys/nca_metadata.h"
12#include "core/file_sys/patch_manager.h" 13#include "core/file_sys/patch_manager.h"
@@ -47,6 +48,27 @@ ResultVal<VirtualFile> RomFSFactory::OpenCurrentProcess(u64 current_process_titl
47 patch_manager.PatchRomFS(file, ivfc_offset, ContentRecordType::Program, update_raw)); 48 patch_manager.PatchRomFS(file, ivfc_offset, ContentRecordType::Program, update_raw));
48} 49}
49 50
51ResultVal<VirtualFile> RomFSFactory::OpenPatchedRomFS(u64 title_id, ContentRecordType type) const {
52 auto nca = content_provider.GetEntry(title_id, type);
53
54 if (nca == nullptr) {
55 // TODO: Find the right error code to use here
56 return RESULT_UNKNOWN;
57 }
58
59 const PatchManager patch_manager{title_id, filesystem_controller, content_provider};
60
61 return MakeResult<VirtualFile>(
62 patch_manager.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), type));
63}
64
65ResultVal<VirtualFile> RomFSFactory::OpenPatchedRomFSWithProgramIndex(
66 u64 title_id, u8 program_index, ContentRecordType type) const {
67 const auto res_title_id = GetBaseTitleIDWithProgramIndex(title_id, program_index);
68
69 return OpenPatchedRomFS(res_title_id, type);
70}
71
50ResultVal<VirtualFile> RomFSFactory::Open(u64 title_id, StorageId storage, 72ResultVal<VirtualFile> RomFSFactory::Open(u64 title_id, StorageId storage,
51 ContentRecordType type) const { 73 ContentRecordType type) const {
52 const std::shared_ptr<NCA> res = GetEntry(title_id, storage, type); 74 const std::shared_ptr<NCA> res = GetEntry(title_id, storage, type);
diff --git a/src/core/file_sys/romfs_factory.h b/src/core/file_sys/romfs_factory.h
index ec704dfa8..96dd0d578 100644
--- a/src/core/file_sys/romfs_factory.h
+++ b/src/core/file_sys/romfs_factory.h
@@ -42,6 +42,10 @@ public:
42 42
43 void SetPackedUpdate(VirtualFile update_raw); 43 void SetPackedUpdate(VirtualFile update_raw);
44 [[nodiscard]] ResultVal<VirtualFile> OpenCurrentProcess(u64 current_process_title_id) const; 44 [[nodiscard]] ResultVal<VirtualFile> OpenCurrentProcess(u64 current_process_title_id) const;
45 [[nodiscard]] ResultVal<VirtualFile> OpenPatchedRomFS(u64 title_id,
46 ContentRecordType type) const;
47 [[nodiscard]] ResultVal<VirtualFile> OpenPatchedRomFSWithProgramIndex(
48 u64 title_id, u8 program_index, ContentRecordType type) const;
45 [[nodiscard]] ResultVal<VirtualFile> Open(u64 title_id, StorageId storage, 49 [[nodiscard]] ResultVal<VirtualFile> Open(u64 title_id, StorageId storage,
46 ContentRecordType type) const; 50 ContentRecordType type) const;
47 51
diff --git a/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp b/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp
index 69d62ce8f..29ef110a6 100644
--- a/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp
+++ b/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp
@@ -6,191 +6,384 @@
6 6
7namespace FileSys::SystemArchive::SharedFontData { 7namespace FileSys::SystemArchive::SharedFontData {
8 8
9const std::array<unsigned char, 2932> FONT_NINTENDO_EXTENDED{{ 9const std::array<unsigned char, 6024> FONT_NINTENDO_EXTENDED{{
10 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x80, 0x00, 0x03, 0x00, 0x70, 0x44, 0x53, 0x49, 0x47, 10 0x00, 0x01, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x80, 0x00, 0x03, 0x00, 0x60, 0x4F, 0x53, 0x2F, 0x32,
11 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0b, 0x6c, 0x00, 0x00, 0x00, 0x08, 0x4f, 0x53, 0x2f, 0x32, 11 0x34, 0x00, 0x1E, 0x26, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60, 0x63, 0x6D, 0x61, 0x70,
12 0x33, 0x86, 0x1d, 0x9b, 0x00, 0x00, 0x01, 0x78, 0x00, 0x00, 0x00, 0x60, 0x63, 0x6d, 0x61, 0x70, 12 0xC1, 0xE7, 0xC8, 0xF3, 0x00, 0x00, 0x02, 0x0C, 0x00, 0x00, 0x01, 0x72, 0x63, 0x76, 0x74, 0x20,
13 0xc2, 0x06, 0x20, 0xde, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x63, 0x76, 0x74, 0x20, 13 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x05, 0x0C, 0x00, 0x00, 0x00, 0x06, 0x66, 0x70, 0x67, 0x6D,
14 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x04, 0x2c, 0x00, 0x00, 0x00, 0x06, 0x66, 0x70, 0x67, 0x6d, 14 0x06, 0x59, 0x9C, 0x37, 0x00, 0x00, 0x03, 0x80, 0x00, 0x00, 0x01, 0x73, 0x67, 0x61, 0x73, 0x70,
15 0x06, 0x59, 0x9c, 0x37, 0x00, 0x00, 0x02, 0xa0, 0x00, 0x00, 0x01, 0x73, 0x67, 0x61, 0x73, 0x70, 15 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x17, 0x80, 0x00, 0x00, 0x00, 0x08, 0x67, 0x6C, 0x79, 0x66,
16 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x0b, 0x64, 0x00, 0x00, 0x00, 0x08, 0x67, 0x6c, 0x79, 0x66, 16 0x50, 0x0B, 0xEA, 0xFA, 0x00, 0x00, 0x05, 0x50, 0x00, 0x00, 0x0F, 0x04, 0x68, 0x65, 0x61, 0x64,
17 0x10, 0x31, 0x88, 0x00, 0x00, 0x00, 0x04, 0x34, 0x00, 0x00, 0x04, 0x64, 0x68, 0x65, 0x61, 0x64, 17 0x18, 0x65, 0x81, 0x09, 0x00, 0x00, 0x00, 0xEC, 0x00, 0x00, 0x00, 0x36, 0x68, 0x68, 0x65, 0x61,
18 0x15, 0x9d, 0xef, 0x91, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x36, 0x68, 0x68, 0x65, 0x61, 18 0x09, 0x88, 0x03, 0x86, 0x00, 0x00, 0x01, 0x24, 0x00, 0x00, 0x00, 0x24, 0x68, 0x6D, 0x74, 0x78,
19 0x09, 0x60, 0x03, 0x71, 0x00, 0x00, 0x01, 0x34, 0x00, 0x00, 0x00, 0x24, 0x68, 0x6d, 0x74, 0x78, 19 0x0A, 0xF0, 0x01, 0x94, 0x00, 0x00, 0x01, 0xC8, 0x00, 0x00, 0x00, 0x42, 0x6C, 0x6F, 0x63, 0x61,
20 0x0d, 0x2e, 0x03, 0xa7, 0x00, 0x00, 0x01, 0xd8, 0x00, 0x00, 0x00, 0x26, 0x6c, 0x6f, 0x63, 0x61, 20 0x34, 0x80, 0x30, 0x6E, 0x00, 0x00, 0x05, 0x14, 0x00, 0x00, 0x00, 0x3A, 0x6D, 0x61, 0x78, 0x70,
21 0x05, 0xc0, 0x04, 0x6c, 0x00, 0x00, 0x08, 0x98, 0x00, 0x00, 0x00, 0x1e, 0x6d, 0x61, 0x78, 0x70, 21 0x02, 0x2C, 0x00, 0x72, 0x00, 0x00, 0x01, 0x48, 0x00, 0x00, 0x00, 0x20, 0x6E, 0x61, 0x6D, 0x65,
22 0x02, 0x1c, 0x00, 0x5f, 0x00, 0x00, 0x01, 0x58, 0x00, 0x00, 0x00, 0x20, 0x6e, 0x61, 0x6d, 0x65, 22 0xDB, 0xC5, 0x42, 0x4D, 0x00, 0x00, 0x14, 0x54, 0x00, 0x00, 0x01, 0xFE, 0x70, 0x6F, 0x73, 0x74,
23 0x7c, 0xe0, 0x84, 0x5c, 0x00, 0x00, 0x08, 0xb8, 0x00, 0x00, 0x02, 0x09, 0x70, 0x6f, 0x73, 0x74, 23 0xF4, 0xB4, 0xAC, 0xAB, 0x00, 0x00, 0x16, 0x54, 0x00, 0x00, 0x01, 0x2A, 0x70, 0x72, 0x65, 0x70,
24 0x47, 0x4e, 0x74, 0x19, 0x00, 0x00, 0x0a, 0xc4, 0x00, 0x00, 0x00, 0x9e, 0x70, 0x72, 0x65, 0x70, 24 0x1C, 0xFC, 0x7D, 0x9C, 0x00, 0x00, 0x04, 0xF4, 0x00, 0x00, 0x00, 0x16, 0x00, 0x01, 0x00, 0x00,
25 0x1c, 0xfc, 0x7d, 0x9c, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00, 0x16, 0x00, 0x01, 0x00, 0x00, 25 0x00, 0x01, 0x00, 0x00, 0xC9, 0x16, 0x5B, 0x71, 0x5F, 0x0F, 0x3C, 0xF5, 0x00, 0x0B, 0x04, 0x00,
26 0x00, 0x01, 0x00, 0x00, 0x7c, 0xc7, 0xb1, 0x63, 0x5f, 0x0f, 0x3c, 0xf5, 0x00, 0x1b, 0x03, 0xe8, 26 0x00, 0x00, 0x00, 0x00, 0xD9, 0x44, 0x2F, 0x5D, 0x00, 0x00, 0x00, 0x00, 0xDC, 0x02, 0x0D, 0xA7,
27 0x00, 0x00, 0x00, 0x00, 0xd9, 0x44, 0x2f, 0x5d, 0x00, 0x00, 0x00, 0x00, 0xd9, 0x45, 0x7b, 0x69, 27 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x00, 0x00, 0x08, 0x00, 0x02, 0x00, 0x00,
28 0x00, 0x00, 0x00, 0x00, 0x03, 0xe6, 0x03, 0xe8, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00, 0x00, 28 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x03, 0x9A, 0xFF, 0x80, 0x02, 0x00, 0x04, 0x00,
29 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x03, 0x84, 0xff, 0x83, 0x01, 0xf4, 0x03, 0xe8, 29 0x00, 0x00, 0x00, 0x00, 0x03, 0xEC, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
30 0x00, 0x00, 0x00, 0x00, 0x03, 0xe6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 30 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x71,
31 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x5e, 31 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00,
32 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 32 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, 0xC4, 0x01, 0x90, 0x00, 0x05,
33 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, 0x74, 0x01, 0x90, 0x00, 0x05, 33 0x00, 0x04, 0x00, 0xD2, 0x00, 0xD2, 0x00, 0x00, 0x01, 0x26, 0x00, 0xD2, 0x00, 0xD2, 0x00, 0x00,
34 0x00, 0x04, 0x00, 0xcd, 0x00, 0xcd, 0x00, 0x00, 0x01, 0x1f, 0x00, 0xcd, 0x00, 0xcd, 0x00, 0x00, 34 0x03, 0xDA, 0x00, 0x68, 0x02, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
35 0x03, 0xc3, 0x00, 0x66, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 35 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
37 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x00, 0xc0, 0x00, 0x00, 0xe0, 0xe9, 0x03, 0x84, 0xff, 0x83, 36 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x00, 0xC0, 0x00, 0x0D, 0xE0, 0xF0, 0x03, 0x9A, 0xFF, 0x80,
38 0x01, 0xf4, 0x02, 0xee, 0x00, 0xfa, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0xe8, 37 0x02, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
39 0x02, 0xbc, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x03, 0xe8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 38 0x02, 0xCD, 0x00, 0x00, 0x00, 0x20, 0x00, 0x01, 0x04, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00,
40 0x00, 0xfa, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x00, 0x03, 0xe8, 0x00, 0xeb, 0x01, 0x21, 0x00, 0xff, 39 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14,
41 0x00, 0xff, 0x01, 0x3d, 0x01, 0x17, 0x00, 0x42, 0x00, 0x1c, 0x00, 0x3e, 0x00, 0x17, 0x00, 0x00, 40 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14,
42 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x68, 0x00, 0x01, 0x00, 0x00, 41 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14,
43 0x00, 0x00, 0x00, 0x1c, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x68, 0x00, 0x06, 0x00, 0x4c, 42 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
44 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 43 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6C,
44 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x50, 0x00, 0x00, 0x00, 0x10,
45 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x20, 0xE0, 0xA9, 0xE0, 0xB4,
46 0xE0, 0xE9, 0xE0, 0xF0, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x20, 0xE0, 0xA0,
47 0xE0, 0xB3, 0xE0, 0xE0, 0xE0, 0xEF, 0xFF, 0xFF, 0x00, 0x01, 0xFF, 0xF5, 0xFF, 0xE3, 0x1F, 0x64,
48 0x1F, 0x5B, 0x1F, 0x30, 0x1F, 0x2B, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x06, 0x00, 0x00, 0x01, 0x00,
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x03, 0x00,
45 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 52 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
46 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
47 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
48 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0a,
49 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x20, 0xe0, 0xe9, 0xff, 0xff,
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x20, 0xe0, 0xe0, 0xff, 0xff, 0x00, 0x01, 0xff, 0xf5,
51 0xff, 0xe3, 0x1f, 0x24, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
52 0xb8, 0x00, 0x00, 0x2c, 0x4b, 0xb8, 0x00, 0x09, 0x50, 0x58, 0xb1, 0x01, 0x01, 0x8e, 0x59, 0xb8,
53 0x01, 0xff, 0x85, 0xb8, 0x00, 0x44, 0x1d, 0xb9, 0x00, 0x09, 0x00, 0x03, 0x5f, 0x5e, 0x2d, 0xb8,
54 0x00, 0x01, 0x2c, 0x20, 0x20, 0x45, 0x69, 0x44, 0xb0, 0x01, 0x60, 0x2d, 0xb8, 0x00, 0x02, 0x2c,
55 0xb8, 0x00, 0x01, 0x2a, 0x21, 0x2d, 0xb8, 0x00, 0x03, 0x2c, 0x20, 0x46, 0xb0, 0x03, 0x25, 0x46,
56 0x52, 0x58, 0x23, 0x59, 0x20, 0x8a, 0x20, 0x8a, 0x49, 0x64, 0x8a, 0x20, 0x46, 0x20, 0x68, 0x61,
57 0x64, 0xb0, 0x04, 0x25, 0x46, 0x20, 0x68, 0x61, 0x64, 0x52, 0x58, 0x23, 0x65, 0x8a, 0x59, 0x2f,
58 0x20, 0xb0, 0x00, 0x53, 0x58, 0x69, 0x20, 0xb0, 0x00, 0x54, 0x58, 0x21, 0xb0, 0x40, 0x59, 0x1b,
59 0x69, 0x20, 0xb0, 0x00, 0x54, 0x58, 0x21, 0xb0, 0x40, 0x65, 0x59, 0x59, 0x3a, 0x2d, 0xb8, 0x00,
60 0x04, 0x2c, 0x20, 0x46, 0xb0, 0x04, 0x25, 0x46, 0x52, 0x58, 0x23, 0x8a, 0x59, 0x20, 0x46, 0x20,
61 0x6a, 0x61, 0x64, 0xb0, 0x04, 0x25, 0x46, 0x20, 0x6a, 0x61, 0x64, 0x52, 0x58, 0x23, 0x8a, 0x59,
62 0x2f, 0xfd, 0x2d, 0xb8, 0x00, 0x05, 0x2c, 0x4b, 0x20, 0xb0, 0x03, 0x26, 0x50, 0x58, 0x51, 0x58,
63 0xb0, 0x80, 0x44, 0x1b, 0xb0, 0x40, 0x44, 0x59, 0x1b, 0x21, 0x21, 0x20, 0x45, 0xb0, 0xc0, 0x50,
64 0x58, 0xb0, 0xc0, 0x44, 0x1b, 0x21, 0x59, 0x59, 0x2d, 0xb8, 0x00, 0x06, 0x2c, 0x20, 0x20, 0x45,
65 0x69, 0x44, 0xb0, 0x01, 0x60, 0x20, 0x20, 0x45, 0x7d, 0x69, 0x18, 0x44, 0xb0, 0x01, 0x60, 0x2d,
66 0xb8, 0x00, 0x07, 0x2c, 0xb8, 0x00, 0x06, 0x2a, 0x2d, 0xb8, 0x00, 0x08, 0x2c, 0x4b, 0x20, 0xb0,
67 0x03, 0x26, 0x53, 0x58, 0xb0, 0x40, 0x1b, 0xb0, 0x00, 0x59, 0x8a, 0x8a, 0x20, 0xb0, 0x03, 0x26,
68 0x53, 0x58, 0x23, 0x21, 0xb0, 0x80, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20, 0xb0, 0x03, 0x26,
69 0x53, 0x58, 0x23, 0x21, 0xb8, 0x00, 0xc0, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20, 0xb0, 0x03,
70 0x26, 0x53, 0x58, 0x23, 0x21, 0xb8, 0x01, 0x00, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20, 0xb0,
71 0x03, 0x26, 0x53, 0x58, 0x23, 0x21, 0xb8, 0x01, 0x40, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20,
72 0xb8, 0x00, 0x03, 0x26, 0x53, 0x58, 0xb0, 0x03, 0x25, 0x45, 0xb8, 0x01, 0x80, 0x50, 0x58, 0x23,
73 0x21, 0xb8, 0x01, 0x80, 0x23, 0x21, 0x1b, 0xb0, 0x03, 0x25, 0x45, 0x23, 0x21, 0x23, 0x21, 0x59,
74 0x1b, 0x21, 0x59, 0x44, 0x2d, 0xb8, 0x00, 0x09, 0x2c, 0x4b, 0x53, 0x58, 0x45, 0x44, 0x1b, 0x21,
75 0x21, 0x59, 0x2d, 0x00, 0xb8, 0x00, 0x00, 0x2b, 0x00, 0xba, 0x00, 0x01, 0x00, 0x01, 0x00, 0x07,
76 0x2b, 0xb8, 0x00, 0x00, 0x20, 0x45, 0x7d, 0x69, 0x18, 0x44, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
77 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x03, 0xe6, 0x03, 0xe8, 0x00, 0x06,
78 0x00, 0x00, 0x35, 0x01, 0x33, 0x15, 0x01, 0x23, 0x35, 0x03, 0x52, 0x94, 0xfc, 0xa6, 0x8c, 0x90,
79 0x03, 0x58, 0x86, 0xfc, 0xa0, 0x8e, 0x00, 0x00, 0x00, 0x02, 0x00, 0xeb, 0x00, 0xcc, 0x02, 0xfb,
80 0x03, 0x1e, 0x00, 0x08, 0x00, 0x0f, 0x00, 0x00, 0x01, 0x33, 0x13, 0x23, 0x27, 0x23, 0x07, 0x23,
81 0x13, 0x17, 0x07, 0x06, 0x15, 0x33, 0x27, 0x07, 0x01, 0xbc, 0x6d, 0xd2, 0x7c, 0x26, 0xcc, 0x26,
82 0x7c, 0xd1, 0x35, 0x40, 0x02, 0x89, 0x45, 0x02, 0x03, 0x1e, 0xfd, 0xae, 0x77, 0x77, 0x02, 0x52,
83 0x9b, 0xcc, 0x08, 0x04, 0xda, 0x02, 0x00, 0x00, 0x00, 0x03, 0x01, 0x21, 0x00, 0xcc, 0x02, 0xc5,
84 0x03, 0x1e, 0x00, 0x15, 0x00, 0x1f, 0x00, 0x2b, 0x00, 0x00, 0x25, 0x11, 0x33, 0x32, 0x1e, 0x02,
85 0x15, 0x14, 0x0e, 0x02, 0x07, 0x1e, 0x01, 0x15, 0x14, 0x0e, 0x02, 0x2b, 0x01, 0x13, 0x33, 0x32,
86 0x36, 0x35, 0x34, 0x26, 0x2b, 0x01, 0x1d, 0x01, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x26, 0x2b,
87 0x01, 0x15, 0x01, 0x21, 0xea, 0x25, 0x3f, 0x2e, 0x1a, 0x0e, 0x15, 0x1b, 0x0e, 0x2d, 0x2d, 0x1a,
88 0x2e, 0x3f, 0x25, 0xf8, 0x76, 0x62, 0x20, 0x2a, 0x28, 0x22, 0x62, 0x76, 0x10, 0x18, 0x11, 0x09,
89 0x22, 0x22, 0x74, 0xcc, 0x02, 0x52, 0x18, 0x2b, 0x3c, 0x24, 0x1d, 0x1f, 0x17, 0x17, 0x14, 0x0f,
90 0x48, 0x2f, 0x24, 0x3f, 0x2e, 0x1a, 0x01, 0x5b, 0x29, 0x20, 0x20, 0x2b, 0x94, 0xf8, 0x0e, 0x16,
91 0x1c, 0x0e, 0x1f, 0x31, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xff, 0x00, 0xcc, 0x02, 0xe7,
92 0x03, 0x1e, 0x00, 0x0c, 0x00, 0x00, 0x01, 0x33, 0x17, 0x37, 0x33, 0x03, 0x13, 0x23, 0x27, 0x07,
93 0x23, 0x13, 0x03, 0x01, 0x04, 0x86, 0x69, 0x69, 0x86, 0xa3, 0xa8, 0x88, 0x6c, 0x6c, 0x88, 0xa8,
94 0xa3, 0x03, 0x1e, 0xcb, 0xcb, 0xfe, 0xda, 0xfe, 0xd4, 0xcf, 0xcf, 0x01, 0x2c, 0x01, 0x26, 0x00,
95 0x00, 0x01, 0x00, 0xff, 0x00, 0xcc, 0x02, 0xe7, 0x03, 0x1e, 0x00, 0x0f, 0x00, 0x00, 0x01, 0x03,
96 0x33, 0x17, 0x32, 0x15, 0x1e, 0x01, 0x15, 0x1b, 0x01, 0x33, 0x03, 0x15, 0x23, 0x35, 0x01, 0xb8,
97 0xb9, 0x7e, 0x01, 0x01, 0x01, 0x03, 0x70, 0x75, 0x7f, 0xb9, 0x76, 0x01, 0xa3, 0x01, 0x7b, 0x01,
98 0x01, 0x01, 0x05, 0x02, 0xff, 0x00, 0x01, 0x0a, 0xfe, 0x85, 0xd7, 0xd7, 0x00, 0x01, 0x01, 0x3d,
99 0x00, 0xcc, 0x02, 0xa9, 0x03, 0x1e, 0x00, 0x06, 0x00, 0x00, 0x25, 0x11, 0x33, 0x11, 0x33, 0x15,
100 0x21, 0x01, 0x3d, 0x75, 0xf7, 0xfe, 0x94, 0xcc, 0x02, 0x52, 0xfe, 0x10, 0x62, 0x00, 0x00, 0x00,
101 0x00, 0x02, 0x01, 0x17, 0x00, 0xbc, 0x02, 0xcf, 0x03, 0x0e, 0x00, 0x15, 0x00, 0x21, 0x00, 0x00,
102 0x25, 0x11, 0x33, 0x32, 0x1e, 0x02, 0x1d, 0x01, 0x0e, 0x03, 0x1d, 0x01, 0x17, 0x15, 0x23, 0x27,
103 0x23, 0x15, 0x23, 0x13, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x26, 0x2b, 0x01, 0x15, 0x01, 0x17,
104 0xf4, 0x27, 0x40, 0x2e, 0x19, 0x01, 0x1f, 0x24, 0x1e, 0x78, 0x7d, 0x6a, 0x5c, 0x75, 0x76, 0x72,
105 0x12, 0x19, 0x11, 0x08, 0x26, 0x26, 0x6a, 0xbc, 0x02, 0x52, 0x1d, 0x31, 0x42, 0x25, 0x16, 0x18,
106 0x32, 0x2a, 0x1b, 0x02, 0x01, 0xef, 0x06, 0xd7, 0xd7, 0x01, 0x3f, 0x10, 0x1a, 0x1e, 0x0f, 0x23,
107 0x36, 0xb0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x42, 0x00, 0xbc, 0x03, 0xa4, 0x03, 0x0e, 0x00, 0x0a,
108 0x00, 0x11, 0x00, 0x00, 0x13, 0x35, 0x21, 0x15, 0x01, 0x21, 0x15, 0x21, 0x35, 0x01, 0x21, 0x01,
109 0x11, 0x33, 0x11, 0x33, 0x15, 0x21, 0x42, 0x01, 0xa7, 0xfe, 0xeb, 0x01, 0x1b, 0xfe, 0x53, 0x01,
110 0x15, 0xfe, 0xeb, 0x01, 0xf7, 0x75, 0xf6, 0xfe, 0x95, 0x02, 0xac, 0x62, 0x45, 0xfe, 0x55, 0x62,
111 0x47, 0x01, 0xa9, 0xfe, 0x10, 0x02, 0x52, 0xfe, 0x10, 0x62, 0x00, 0x00, 0x00, 0x03, 0x00, 0x1c,
112 0x00, 0xbc, 0x03, 0xca, 0x03, 0x0e, 0x00, 0x0a, 0x00, 0x21, 0x00, 0x2f, 0x00, 0x00, 0x13, 0x35,
113 0x21, 0x15, 0x01, 0x21, 0x15, 0x21, 0x35, 0x01, 0x21, 0x01, 0x11, 0x33, 0x32, 0x1e, 0x02, 0x15,
114 0x14, 0x06, 0x07, 0x0e, 0x03, 0x15, 0x17, 0x15, 0x23, 0x27, 0x23, 0x15, 0x23, 0x13, 0x33, 0x32,
115 0x3e, 0x02, 0x35, 0x34, 0x2e, 0x02, 0x2b, 0x01, 0x15, 0x1c, 0x01, 0xa7, 0xfe, 0xeb, 0x01, 0x1b,
116 0xfe, 0x53, 0x01, 0x15, 0xfe, 0xeb, 0x01, 0xf7, 0xf3, 0x27, 0x41, 0x2d, 0x19, 0x1c, 0x20, 0x01,
117 0x0d, 0x0e, 0x0a, 0x78, 0x7d, 0x69, 0x5c, 0x75, 0x76, 0x71, 0x11, 0x1a, 0x12, 0x09, 0x0a, 0x14,
118 0x1d, 0x13, 0x69, 0x02, 0xac, 0x62, 0x45, 0xfe, 0x55, 0x62, 0x47, 0x01, 0xa9, 0xfe, 0x10, 0x02,
119 0x52, 0x1d, 0x31, 0x42, 0x25, 0x2b, 0x44, 0x1d, 0x01, 0x08, 0x09, 0x07, 0x01, 0xf1, 0x06, 0xd7,
120 0xd7, 0x01, 0x3f, 0x11, 0x19, 0x1f, 0x0e, 0x11, 0x20, 0x19, 0x0f, 0xb0, 0x00, 0x02, 0x00, 0x3e,
121 0x00, 0xb3, 0x03, 0xa8, 0x03, 0x17, 0x00, 0x3a, 0x00, 0x41, 0x00, 0x00, 0x13, 0x34, 0x3e, 0x02,
122 0x33, 0x32, 0x1e, 0x02, 0x15, 0x23, 0x27, 0x34, 0x27, 0x2e, 0x01, 0x23, 0x22, 0x0e, 0x02, 0x15,
123 0x14, 0x16, 0x15, 0x1e, 0x05, 0x15, 0x14, 0x0e, 0x02, 0x23, 0x22, 0x2e, 0x02, 0x35, 0x33, 0x1e,
124 0x01, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x2e, 0x04, 0x35, 0x01, 0x11, 0x33, 0x11, 0x33, 0x15,
125 0x21, 0x50, 0x24, 0x3b, 0x4a, 0x27, 0x28, 0x4b, 0x39, 0x22, 0x73, 0x01, 0x01, 0x08, 0x2b, 0x29,
126 0x10, 0x20, 0x19, 0x0f, 0x01, 0x0b, 0x35, 0x41, 0x46, 0x3b, 0x25, 0x23, 0x3a, 0x4b, 0x27, 0x2b,
127 0x50, 0x3f, 0x26, 0x74, 0x05, 0x34, 0x33, 0x10, 0x20, 0x1a, 0x11, 0x2c, 0x42, 0x4d, 0x42, 0x2c,
128 0x01, 0xef, 0x73, 0xf6, 0xfe, 0x97, 0x02, 0x70, 0x2a, 0x3f, 0x2a, 0x14, 0x18, 0x2e, 0x44, 0x2c,
129 0x02, 0x03, 0x01, 0x27, 0x27, 0x07, 0x10, 0x1a, 0x12, 0x02, 0x0b, 0x02, 0x1f, 0x22, 0x19, 0x17,
130 0x27, 0x3f, 0x34, 0x2c, 0x3e, 0x28, 0x13, 0x1a, 0x32, 0x48, 0x2e, 0x30, 0x30, 0x06, 0x0f, 0x1a,
131 0x13, 0x21, 0x27, 0x1e, 0x1b, 0x29, 0x3e, 0x31, 0xfe, 0x4c, 0x02, 0x53, 0xfe, 0x10, 0x63, 0x00,
132 0x00, 0x03, 0x00, 0x17, 0x00, 0xb3, 0x03, 0xce, 0x03, 0x17, 0x00, 0x38, 0x00, 0x4f, 0x00, 0x5d,
133 0x00, 0x00, 0x13, 0x34, 0x3e, 0x02, 0x33, 0x32, 0x1e, 0x02, 0x15, 0x23, 0x27, 0x34, 0x23, 0x2e,
134 0x01, 0x23, 0x22, 0x0e, 0x02, 0x15, 0x14, 0x1e, 0x04, 0x15, 0x14, 0x0e, 0x02, 0x23, 0x22, 0x2e,
135 0x02, 0x35, 0x33, 0x1e, 0x01, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x26, 0x27, 0x2e, 0x03, 0x35,
136 0x01, 0x11, 0x33, 0x32, 0x1e, 0x02, 0x15, 0x14, 0x06, 0x07, 0x30, 0x0e, 0x02, 0x31, 0x17, 0x15,
137 0x23, 0x27, 0x23, 0x15, 0x23, 0x13, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x2e, 0x02, 0x2b, 0x01,
138 0x15, 0x2a, 0x24, 0x3a, 0x4a, 0x26, 0x29, 0x4b, 0x39, 0x23, 0x73, 0x01, 0x01, 0x08, 0x2a, 0x2a,
139 0x10, 0x1f, 0x1a, 0x10, 0x2c, 0x42, 0x4d, 0x42, 0x2c, 0x23, 0x39, 0x4b, 0x27, 0x2b, 0x51, 0x3f,
140 0x27, 0x75, 0x05, 0x34, 0x33, 0x10, 0x20, 0x1a, 0x10, 0x1f, 0x1c, 0x25, 0x53, 0x47, 0x2e, 0x01,
141 0xed, 0xf3, 0x27, 0x41, 0x2d, 0x19, 0x1c, 0x20, 0x0c, 0x0e, 0x0c, 0x78, 0x7d, 0x68, 0x5d, 0x75,
142 0x76, 0x71, 0x11, 0x1a, 0x12, 0x09, 0x0a, 0x14, 0x1d, 0x13, 0x69, 0x02, 0x71, 0x2a, 0x3e, 0x2a,
143 0x14, 0x18, 0x2e, 0x44, 0x2c, 0x02, 0x02, 0x27, 0x29, 0x07, 0x11, 0x1a, 0x12, 0x1d, 0x24, 0x1c,
144 0x1d, 0x2b, 0x40, 0x32, 0x2c, 0x3f, 0x29, 0x13, 0x1a, 0x31, 0x49, 0x2e, 0x30, 0x30, 0x06, 0x0f,
145 0x19, 0x13, 0x1e, 0x22, 0x0b, 0x0e, 0x20, 0x2f, 0x43, 0x30, 0xfe, 0x4b, 0x02, 0x52, 0x1d, 0x32,
146 0x42, 0x25, 0x2c, 0x42, 0x1d, 0x08, 0x0a, 0x08, 0xf1, 0x06, 0xd7, 0xd7, 0x01, 0x3f, 0x11, 0x19,
147 0x1f, 0x0e, 0x11, 0x20, 0x19, 0x0f, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x12, 0x00, 0x12,
148 0x00, 0x12, 0x00, 0x32, 0x00, 0x72, 0x00, 0x8e, 0x00, 0xac, 0x00, 0xbe, 0x00, 0xf0, 0x01, 0x14,
149 0x01, 0x5c, 0x01, 0xb6, 0x02, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0xa2, 0x00, 0x01,
150 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
151 0x00, 0x02, 0x00, 0x07, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x2f,
152 0x00, 0x17, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x12, 0x00, 0x46, 0x00, 0x01,
153 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x58, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
154 0x00, 0x06, 0x00, 0x12, 0x00, 0x65, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x01, 0x00, 0x20,
155 0x00, 0x77, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x02, 0x00, 0x0e, 0x00, 0x97, 0x00, 0x03,
156 0x00, 0x01, 0x04, 0x09, 0x00, 0x03, 0x00, 0x5e, 0x00, 0xa5, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09,
157 0x00, 0x04, 0x00, 0x24, 0x01, 0x03, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x05, 0x00, 0x1a,
158 0x01, 0x27, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x06, 0x00, 0x24, 0x01, 0x41, 0x00, 0x03,
159 0x00, 0x01, 0x04, 0x09, 0x00, 0x11, 0x00, 0x02, 0x01, 0x65, 0x59, 0x75, 0x7a, 0x75, 0x4f, 0x53,
160 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x67, 0x75, 0x6c, 0x61,
161 0x72, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x30, 0x30, 0x3b, 0x3b,
162 0x59, 0x75, 0x7a, 0x75, 0x4f, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e,
163 0x2d, 0x52, 0x3b, 0x32, 0x30, 0x31, 0x39, 0x3b, 0x46, 0x4c, 0x56, 0x49, 0x2d, 0x36, 0x31, 0x34,
164 0x59, 0x75, 0x7a, 0x75, 0x4f, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e,
165 0x20, 0x52, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x30, 0x30, 0x59,
166 0x75, 0x7a, 0x75, 0x4f, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x2d,
167 0x52, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00,
168 0x45, 0x00, 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00,
169 0x6e, 0x00, 0x52, 0x00, 0x65, 0x00, 0x67, 0x00, 0x75, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x72, 0x00,
170 0x56, 0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x20, 0x00,
171 0x31, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x30, 0x00, 0x30, 0x00, 0x3b, 0x00, 0x3b, 0x00, 0x59, 0x00,
172 0x75, 0x00, 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00,
173 0x74, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x2d, 0x00,
174 0x52, 0x00, 0x3b, 0x00, 0x32, 0x00, 0x30, 0x00, 0x31, 0x00, 0x39, 0x00, 0x3b, 0x00, 0x46, 0x00,
175 0x4c, 0x00, 0x56, 0x00, 0x49, 0x00, 0x2d, 0x00, 0x36, 0x00, 0x31, 0x00, 0x34, 0x00, 0x59, 0x00,
176 0x75, 0x00, 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00,
177 0x74, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x20, 0x00,
178 0x52, 0x00, 0x56, 0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00,
179 0x20, 0x00, 0x31, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x30, 0x00, 0x30, 0x00, 0x59, 0x00, 0x75, 0x00,
180 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00, 0x74, 0x00,
181 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x2d, 0x00, 0x52, 0x00,
182 0x52, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x9c, 0x00, 0x32,
183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 54 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
184 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, 0x02, 0x01, 0x03, 0x00, 0x03, 0x01, 0x04, 55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
185 0x01, 0x05, 0x01, 0x06, 0x01, 0x07, 0x01, 0x08, 0x01, 0x09, 0x01, 0x0a, 0x01, 0x0b, 0x01, 0x0c, 56 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
186 0x01, 0x0d, 0x07, 0x75, 0x6e, 0x69, 0x30, 0x30, 0x30, 0x30, 0x07, 0x75, 0x6e, 0x69, 0x30, 0x30, 57 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
187 0x30, 0x44, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x30, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 58 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
188 0x45, 0x31, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x32, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 59 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
189 0x45, 0x33, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x34, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 60 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
190 0x45, 0x35, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x36, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 61 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
191 0x45, 0x37, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x38, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 62 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
192 0x45, 0x39, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xff, 0xff, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, 63 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
193 0x00, 0x00, 0x00, 0x00, 64 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66 0xB8, 0x00, 0x00, 0x2C, 0x4B, 0xB8, 0x00, 0x09, 0x50, 0x58, 0xB1, 0x01, 0x01, 0x8E, 0x59, 0xB8,
67 0x01, 0xFF, 0x85, 0xB8, 0x00, 0x44, 0x1D, 0xB9, 0x00, 0x09, 0x00, 0x03, 0x5F, 0x5E, 0x2D, 0xB8,
68 0x00, 0x01, 0x2C, 0x20, 0x20, 0x45, 0x69, 0x44, 0xB0, 0x01, 0x60, 0x2D, 0xB8, 0x00, 0x02, 0x2C,
69 0xB8, 0x00, 0x01, 0x2A, 0x21, 0x2D, 0xB8, 0x00, 0x03, 0x2C, 0x20, 0x46, 0xB0, 0x03, 0x25, 0x46,
70 0x52, 0x58, 0x23, 0x59, 0x20, 0x8A, 0x20, 0x8A, 0x49, 0x64, 0x8A, 0x20, 0x46, 0x20, 0x68, 0x61,
71 0x64, 0xB0, 0x04, 0x25, 0x46, 0x20, 0x68, 0x61, 0x64, 0x52, 0x58, 0x23, 0x65, 0x8A, 0x59, 0x2F,
72 0x20, 0xB0, 0x00, 0x53, 0x58, 0x69, 0x20, 0xB0, 0x00, 0x54, 0x58, 0x21, 0xB0, 0x40, 0x59, 0x1B,
73 0x69, 0x20, 0xB0, 0x00, 0x54, 0x58, 0x21, 0xB0, 0x40, 0x65, 0x59, 0x59, 0x3A, 0x2D, 0xB8, 0x00,
74 0x04, 0x2C, 0x20, 0x46, 0xB0, 0x04, 0x25, 0x46, 0x52, 0x58, 0x23, 0x8A, 0x59, 0x20, 0x46, 0x20,
75 0x6A, 0x61, 0x64, 0xB0, 0x04, 0x25, 0x46, 0x20, 0x6A, 0x61, 0x64, 0x52, 0x58, 0x23, 0x8A, 0x59,
76 0x2F, 0xFD, 0x2D, 0xB8, 0x00, 0x05, 0x2C, 0x4B, 0x20, 0xB0, 0x03, 0x26, 0x50, 0x58, 0x51, 0x58,
77 0xB0, 0x80, 0x44, 0x1B, 0xB0, 0x40, 0x44, 0x59, 0x1B, 0x21, 0x21, 0x20, 0x45, 0xB0, 0xC0, 0x50,
78 0x58, 0xB0, 0xC0, 0x44, 0x1B, 0x21, 0x59, 0x59, 0x2D, 0xB8, 0x00, 0x06, 0x2C, 0x20, 0x20, 0x45,
79 0x69, 0x44, 0xB0, 0x01, 0x60, 0x20, 0x20, 0x45, 0x7D, 0x69, 0x18, 0x44, 0xB0, 0x01, 0x60, 0x2D,
80 0xB8, 0x00, 0x07, 0x2C, 0xB8, 0x00, 0x06, 0x2A, 0x2D, 0xB8, 0x00, 0x08, 0x2C, 0x4B, 0x20, 0xB0,
81 0x03, 0x26, 0x53, 0x58, 0xB0, 0x40, 0x1B, 0xB0, 0x00, 0x59, 0x8A, 0x8A, 0x20, 0xB0, 0x03, 0x26,
82 0x53, 0x58, 0x23, 0x21, 0xB0, 0x80, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20, 0xB0, 0x03, 0x26,
83 0x53, 0x58, 0x23, 0x21, 0xB8, 0x00, 0xC0, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20, 0xB0, 0x03,
84 0x26, 0x53, 0x58, 0x23, 0x21, 0xB8, 0x01, 0x00, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20, 0xB0,
85 0x03, 0x26, 0x53, 0x58, 0x23, 0x21, 0xB8, 0x01, 0x40, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20,
86 0xB8, 0x00, 0x03, 0x26, 0x53, 0x58, 0xB0, 0x03, 0x25, 0x45, 0xB8, 0x01, 0x80, 0x50, 0x58, 0x23,
87 0x21, 0xB8, 0x01, 0x80, 0x23, 0x21, 0x1B, 0xB0, 0x03, 0x25, 0x45, 0x23, 0x21, 0x23, 0x21, 0x59,
88 0x1B, 0x21, 0x59, 0x44, 0x2D, 0xB8, 0x00, 0x09, 0x2C, 0x4B, 0x53, 0x58, 0x45, 0x44, 0x1B, 0x21,
89 0x21, 0x59, 0x2D, 0x00, 0xB8, 0x00, 0x00, 0x2B, 0x00, 0xBA, 0x00, 0x01, 0x00, 0x01, 0x00, 0x07,
90 0x2B, 0xB8, 0x00, 0x00, 0x20, 0x45, 0x7D, 0x69, 0x18, 0x44, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
91 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00, 0x16, 0x00, 0x16, 0x00, 0x16, 0x00, 0x70,
92 0x00, 0xDC, 0x01, 0x34, 0x01, 0x7C, 0x01, 0xA2, 0x01, 0xF4, 0x02, 0x3C, 0x02, 0xA8, 0x03, 0x4C,
93 0x03, 0xE2, 0x04, 0x20, 0x04, 0x58, 0x04, 0x9A, 0x04, 0xEE, 0x05, 0x32, 0x05, 0x64, 0x05, 0x80,
94 0x05, 0xC6, 0x05, 0xF6, 0x06, 0x54, 0x06, 0xB2, 0x07, 0x38, 0x07, 0x60, 0x07, 0x82, 0x00, 0x00,
95 0x00, 0x02, 0x00, 0xA4, 0xFF, 0xFF, 0x03, 0x5C, 0x03, 0x09, 0x00, 0x03, 0x00, 0x07, 0x00, 0x00,
96 0x13, 0x11, 0x21, 0x11, 0x25, 0x21, 0x11, 0x21, 0xCD, 0x02, 0x66, 0xFD, 0x71, 0x02, 0xB8, 0xFD,
97 0x48, 0x02, 0xE0, 0xFD, 0x48, 0x02, 0xB8, 0x29, 0xFC, 0xF6, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14,
98 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1F, 0x00, 0x2F, 0x00, 0x39, 0x00, 0x00,
99 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x01, 0x24, 0x32, 0x1E,
100 0x02, 0x14, 0x0E, 0x02, 0x22, 0x2E, 0x02, 0x34, 0x3E, 0x01, 0x13, 0x12, 0x37, 0x33, 0x13, 0x12,
101 0x15, 0x16, 0x23, 0x2F, 0x01, 0x23, 0x07, 0x23, 0x22, 0x26, 0x25, 0x30, 0x27, 0x26, 0x2F, 0x01,
102 0x06, 0x07, 0x06, 0x32, 0x02, 0x5A, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77,
103 0x46, 0x46, 0x77, 0xFE, 0x9E, 0xC8, 0xB7, 0x83, 0x4E, 0x4E, 0x83, 0xB7, 0xC8, 0xB7, 0x83, 0x4E,
104 0x4E, 0x83, 0x23, 0x6C, 0x5E, 0x6D, 0x68, 0x68, 0x01, 0x39, 0x38, 0x2E, 0xD1, 0x2B, 0x37, 0x33,
105 0x04, 0x01, 0x48, 0x1D, 0x1C, 0x0A, 0x05, 0x01, 0x45, 0x01, 0x89, 0x03, 0x3F, 0x46, 0x77, 0xA4,
106 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x4E, 0x83, 0xB7, 0xC8, 0xB7,
107 0x83, 0x4E, 0x4E, 0x83, 0xB7, 0xC8, 0xB7, 0x83, 0xFD, 0x64, 0x01, 0x1A, 0xEB, 0xFE, 0xFE, 0xFE,
108 0xFD, 0x03, 0x01, 0x01, 0x77, 0x78, 0x01, 0xCF, 0x4C, 0x4C, 0x1C, 0x0C, 0x02, 0xBE, 0x02, 0x00,
109 0x00, 0x05, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x2F,
110 0x00, 0x3A, 0x00, 0x44, 0x00, 0x00, 0x12, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x02,
111 0x22, 0x0E, 0x01, 0x02, 0x10, 0x3E, 0x01, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x26, 0x01,
112 0x16, 0x17, 0x14, 0x06, 0x07, 0x06, 0x2B, 0x01, 0x19, 0x01, 0x17, 0x32, 0x17, 0x16, 0x17, 0x16,
113 0x07, 0x06, 0x0F, 0x01, 0x36, 0x37, 0x34, 0x2E, 0x01, 0x27, 0x23, 0x15, 0x33, 0x32, 0x27, 0x32,
114 0x37, 0x36, 0x26, 0x27, 0x26, 0x2B, 0x01, 0x15, 0x45, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46,
115 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE,
116 0xF4, 0xE2, 0x01, 0xF7, 0x61, 0x01, 0x4E, 0x3E, 0x29, 0xAF, 0x4E, 0x81, 0x8B, 0x1D, 0x3C, 0x1F,
117 0x19, 0x04, 0x06, 0x39, 0x57, 0x44, 0x01, 0x1B, 0x2D, 0x51, 0x46, 0x46, 0x47, 0x66, 0x70, 0x16,
118 0x1F, 0x01, 0x2C, 0x08, 0x4B, 0x4C, 0x01, 0xDE, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4,
119 0xA4, 0x77, 0x46, 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2,
120 0x84, 0x84, 0x01, 0x6D, 0x21, 0x5B, 0x40, 0x50, 0x05, 0x03, 0x01, 0x03, 0x01, 0x05, 0x01, 0x05,
121 0x09, 0x30, 0x25, 0x29, 0x40, 0x21, 0xC2, 0x06, 0x3E, 0x1A, 0x21, 0x0B, 0x01, 0x8C, 0xE1, 0x0A,
122 0x0E, 0x54, 0x0B, 0x02, 0x79, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC,
123 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x38, 0x00, 0x00, 0x12, 0x14, 0x1E, 0x02, 0x32, 0x3E,
124 0x02, 0x34, 0x2E, 0x02, 0x22, 0x0E, 0x01, 0x02, 0x10, 0x3E, 0x01, 0x20, 0x1E, 0x01, 0x10, 0x0E,
125 0x01, 0x20, 0x26, 0x36, 0x34, 0x3F, 0x01, 0x27, 0x26, 0x27, 0x33, 0x17, 0x16, 0x33, 0x36, 0x3F,
126 0x02, 0x32, 0x14, 0x06, 0x16, 0x12, 0x14, 0x2B, 0x01, 0x27, 0x26, 0x06, 0x0F, 0x01, 0x23, 0x45,
127 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2,
128 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x7B, 0x58, 0x58, 0x4D, 0x4F, 0x05, 0x7A,
129 0x34, 0x34, 0x02, 0x01, 0x33, 0x32, 0x3C, 0x3C, 0xA1, 0x01, 0xB0, 0x3E, 0x3F, 0x39, 0x3B, 0x02,
130 0x3A, 0x38, 0x3F, 0x01, 0xDE, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46,
131 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x60,
132 0x02, 0x87, 0x88, 0x79, 0x7A, 0x06, 0x54, 0x54, 0x01, 0x53, 0x53, 0x01, 0x01, 0xFB, 0x04, 0xFE,
133 0xF8, 0x02, 0x5B, 0x5A, 0x03, 0x59, 0x59, 0x00, 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC,
134 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E,
135 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x01, 0x24, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E,
136 0x01, 0x10, 0x36, 0x01, 0x35, 0x27, 0x26, 0x34, 0x3B, 0x01, 0x17, 0x16, 0x36, 0x3F, 0x01, 0x33,
137 0x03, 0x15, 0x23, 0x02, 0x5A, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46,
138 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x01,
139 0x36, 0x5E, 0x5F, 0x3C, 0x3D, 0x3D, 0x3D, 0x03, 0x3B, 0x3B, 0x77, 0xBE, 0x68, 0x03, 0x3F, 0x46,
140 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2, 0xFE,
141 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFD, 0xF9, 0x6E, 0x96, 0x95, 0x01, 0x67, 0x67,
142 0x03, 0x66, 0x65, 0xFE, 0xD3, 0xDA, 0x00, 0x00, 0x00, 0x03, 0x00, 0x14, 0xFF, 0xBD, 0x03, 0xEC,
143 0x03, 0x4B, 0x00, 0x06, 0x00, 0x0C, 0x00, 0x12, 0x00, 0x00, 0x01, 0x21, 0x22, 0x15, 0x30, 0x11,
144 0x21, 0x17, 0x21, 0x11, 0x10, 0x25, 0x21, 0x01, 0x11, 0x33, 0x11, 0x21, 0x15, 0x03, 0xBB, 0xFD,
145 0x77, 0xED, 0x03, 0x76, 0x31, 0xFC, 0x28, 0x01, 0x1E, 0x02, 0xBA, 0xFD, 0x5C, 0x68, 0x01, 0x08,
146 0x03, 0x1A, 0xEE, 0xFD, 0xC2, 0x31, 0x02, 0x6F, 0x01, 0x1E, 0x01, 0xFD, 0x36, 0x02, 0x07, 0xFE,
147 0x50, 0x57, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14, 0xFF, 0xBD, 0x03, 0xEC, 0x03, 0x4B, 0x00, 0x06,
148 0x00, 0x0C, 0x00, 0x27, 0x00, 0x32, 0x00, 0x00, 0x05, 0x11, 0x34, 0x27, 0x30, 0x21, 0x11, 0x07,
149 0x11, 0x21, 0x20, 0x19, 0x01, 0x25, 0x11, 0x33, 0x32, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16, 0x07,
150 0x06, 0x07, 0x06, 0x07, 0x1E, 0x02, 0x15, 0x07, 0x23, 0x27, 0x2E, 0x01, 0x2F, 0x01, 0x15, 0x13,
151 0x36, 0x35, 0x34, 0x27, 0x26, 0x27, 0x23, 0x15, 0x33, 0x36, 0x03, 0xBB, 0xED, 0xFD, 0x77, 0x31,
152 0x02, 0xBA, 0x01, 0x1E, 0xFD, 0x2A, 0x77, 0x76, 0x15, 0x49, 0x20, 0x35, 0x08, 0x04, 0x06, 0x13,
153 0x66, 0x0C, 0x01, 0x1F, 0x2E, 0x65, 0x3D, 0x3D, 0x2A, 0x56, 0x28, 0x2E, 0x19, 0x99, 0x3C, 0x20,
154 0x10, 0x56, 0x4F, 0x46, 0x47, 0x12, 0x02, 0x3E, 0xED, 0x01, 0xFC, 0xD4, 0x31, 0x03, 0x8E, 0xFE,
155 0xE1, 0xFD, 0x91, 0xC4, 0x02, 0x07, 0x01, 0x04, 0x13, 0x21, 0x44, 0x1D, 0x19, 0x58, 0x15, 0x02,
156 0x01, 0x13, 0x2D, 0xA2, 0x01, 0x01, 0x3D, 0x81, 0x1A, 0x01, 0x01, 0xDA, 0x01, 0x2D, 0x08, 0x3A,
157 0x29, 0x0F, 0x08, 0x01, 0x85, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14, 0xFF, 0xF5, 0x03, 0xEC,
158 0x03, 0x13, 0x00, 0x09, 0x00, 0x11, 0x00, 0x26, 0x00, 0x32, 0x00, 0x00, 0x37, 0x21, 0x34, 0x10,
159 0x35, 0x34, 0x27, 0x21, 0x04, 0x11, 0x23, 0x10, 0x25, 0x21, 0x16, 0x15, 0x11, 0x21, 0x37, 0x35,
160 0x37, 0x36, 0x22, 0x2B, 0x01, 0x3D, 0x01, 0x3B, 0x01, 0x1D, 0x01, 0x0F, 0x01, 0x3B, 0x01, 0x1D,
161 0x01, 0x2B, 0x01, 0x25, 0x35, 0x3B, 0x01, 0x1D, 0x01, 0x3B, 0x01, 0x1D, 0x01, 0x2B, 0x01, 0x45,
162 0x03, 0x76, 0x45, 0xFE, 0x2D, 0xFE, 0xA2, 0x31, 0x01, 0x8F, 0x01, 0xD3, 0x76, 0xFC, 0x28, 0xA7,
163 0x68, 0x68, 0x01, 0x5B, 0x5D, 0x90, 0x91, 0x6C, 0x6D, 0x71, 0x70, 0xA0, 0xA0, 0x01, 0x75, 0x27,
164 0x28, 0x63, 0x63, 0x8B, 0x8A, 0x27, 0x69, 0x01, 0xA4, 0x69, 0x44, 0x01, 0x02, 0xFE, 0xA4, 0x01,
165 0x8C, 0x03, 0x01, 0x75, 0xFD, 0x58, 0xBB, 0x24, 0x80, 0x80, 0x21, 0x21, 0x1F, 0x1E, 0x85, 0x86,
166 0x20, 0x22, 0xC3, 0xC3, 0xA1, 0xA3, 0x20, 0x22, 0x00, 0x05, 0x00, 0x14, 0xFF, 0xF5, 0x03, 0xEC,
167 0x03, 0x13, 0x00, 0x08, 0x00, 0x10, 0x00, 0x2B, 0x00, 0x37, 0x00, 0x44, 0x00, 0x00, 0x37, 0x21,
168 0x11, 0x10, 0x25, 0x30, 0x21, 0x06, 0x15, 0x03, 0x11, 0x34, 0x37, 0x21, 0x04, 0x19, 0x01, 0x01,
169 0x35, 0x17, 0x32, 0x17, 0x16, 0x17, 0x16, 0x07, 0x06, 0x07, 0x06, 0x17, 0x16, 0x17, 0x16, 0x17,
170 0x16, 0x23, 0x2F, 0x01, 0x2E, 0x01, 0x2F, 0x01, 0x15, 0x23, 0x37, 0x32, 0x36, 0x37, 0x36, 0x35,
171 0x26, 0x27, 0x26, 0x2B, 0x01, 0x15, 0x05, 0x35, 0x37, 0x36, 0x26, 0x2B, 0x01, 0x35, 0x21, 0x15,
172 0x03, 0x17, 0x15, 0x45, 0x03, 0x76, 0xFE, 0xA2, 0xFE, 0x2D, 0x45, 0x31, 0x76, 0x01, 0xD3, 0x01,
173 0x8F, 0xFE, 0x1E, 0x65, 0x6F, 0x15, 0x46, 0x10, 0x05, 0x04, 0x0D, 0x4F, 0x09, 0x09, 0x1F, 0x1D,
174 0x3A, 0x06, 0x01, 0x30, 0x2F, 0x22, 0x37, 0x1E, 0x29, 0x14, 0x4E, 0x82, 0x34, 0x19, 0x0E, 0x13,
175 0x0A, 0x22, 0x07, 0x38, 0x37, 0xFE, 0x3E, 0x68, 0x68, 0x01, 0x5C, 0x5C, 0x01, 0x20, 0xD8, 0xE1,
176 0x27, 0x01, 0x5D, 0x01, 0x5B, 0x03, 0x01, 0x44, 0xFD, 0x58, 0x02, 0xA8, 0x75, 0x01, 0x03, 0xFE,
177 0x74, 0xFE, 0x71, 0x01, 0x5C, 0xC5, 0x01, 0x04, 0x0C, 0x43, 0x15, 0x1D, 0x44, 0x10, 0x04, 0x06,
178 0x14, 0x2B, 0x56, 0x10, 0x01, 0x01, 0x34, 0x52, 0x1C, 0x01, 0x01, 0xA5, 0xE3, 0x04, 0x06, 0x0A,
179 0x20, 0x2C, 0x04, 0x01, 0x65, 0xE3, 0x47, 0x80, 0x80, 0x01, 0x42, 0x3D, 0xFE, 0xF5, 0x01, 0x41,
180 0x00, 0x04, 0x00, 0x14, 0x00, 0x52, 0x03, 0xEC, 0x02, 0xB6, 0x00, 0x08, 0x00, 0x16, 0x00, 0x64,
181 0x00, 0x70, 0x00, 0x00, 0x25, 0x11, 0x21, 0x22, 0x15, 0x30, 0x15, 0x14, 0x33, 0x11, 0x21, 0x32,
182 0x15, 0x11, 0x14, 0x27, 0x21, 0x22, 0x26, 0x3D, 0x01, 0x34, 0x36, 0x13, 0x26, 0x27, 0x26, 0x27,
183 0x26, 0x37, 0x33, 0x36, 0x37, 0x36, 0x33, 0x16, 0x17, 0x16, 0x17, 0x16, 0x37, 0x36, 0x37, 0x36,
184 0x35, 0x34, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26, 0x34, 0x37, 0x36, 0x37,
185 0x36, 0x37, 0x36, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16, 0x0F, 0x01, 0x22, 0x06, 0x23,
186 0x27, 0x26, 0x27, 0x26, 0x23, 0x22, 0x07, 0x06, 0x07, 0x06, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16,
187 0x17, 0x16, 0x17, 0x16, 0x07, 0x06, 0x07, 0x06, 0x27, 0x37, 0x35, 0x3B, 0x01, 0x1D, 0x01, 0x3B,
188 0x01, 0x1D, 0x01, 0x2B, 0x01, 0x03, 0xBB, 0xFD, 0x2A, 0xA0, 0xA0, 0x02, 0xEE, 0x19, 0x19, 0xFD,
189 0x12, 0x57, 0x7A, 0x7A, 0xCA, 0x38, 0x1D, 0x16, 0x08, 0x03, 0x01, 0x02, 0x0F, 0x0C, 0x1E, 0x01,
190 0x02, 0x04, 0x0C, 0x2B, 0x0F, 0x0E, 0x18, 0x0C, 0x09, 0x04, 0x15, 0x32, 0x23, 0x12, 0x1C, 0x0E,
191 0x09, 0x03, 0x01, 0x01, 0x09, 0x21, 0x0F, 0x14, 0x2E, 0x2A, 0x13, 0x0F, 0x0C, 0x08, 0x0B, 0x05,
192 0x02, 0x01, 0x02, 0x03, 0x36, 0x03, 0x02, 0x03, 0x08, 0x0D, 0x23, 0x16, 0x0E, 0x10, 0x01, 0x01,
193 0x07, 0x0B, 0x32, 0x25, 0x13, 0x26, 0x0F, 0x09, 0x01, 0x01, 0x0F, 0x11, 0x24, 0x21, 0x2A, 0xE3,
194 0x20, 0x20, 0x52, 0x50, 0x71, 0x71, 0x84, 0x02, 0x00, 0xAF, 0xA2, 0xAF, 0x02, 0x32, 0x19, 0xFD,
195 0xCE, 0x19, 0x01, 0x84, 0x5C, 0xA2, 0x5C, 0x85, 0xFE, 0x29, 0x04, 0x1E, 0x18, 0x26, 0x0F, 0x01,
196 0x02, 0x01, 0x03, 0x05, 0x0B, 0x29, 0x06, 0x02, 0x03, 0x04, 0x11, 0x0B, 0x0D, 0x0A, 0x06, 0x12,
197 0x0D, 0x0A, 0x07, 0x0C, 0x18, 0x0D, 0x10, 0x06, 0x18, 0x05, 0x27, 0x14, 0x09, 0x03, 0x0A, 0x0D,
198 0x06, 0x09, 0x09, 0x0D, 0x0F, 0x14, 0x0C, 0x06, 0x03, 0x02, 0x04, 0x10, 0x0A, 0x11, 0x08, 0x09,
199 0x0E, 0x0C, 0x07, 0x0C, 0x0C, 0x0A, 0x07, 0x0F, 0x20, 0x11, 0x18, 0x1E, 0x1A, 0x1E, 0x0C, 0x0B,
200 0x03, 0xAA, 0xA5, 0x89, 0x8A, 0x1C, 0x1B, 0x00, 0x00, 0x05, 0x00, 0x14, 0x00, 0x53, 0x03, 0xEC,
201 0x02, 0xB6, 0x00, 0x08, 0x00, 0x16, 0x00, 0x2E, 0x00, 0x38, 0x00, 0x65, 0x00, 0x00, 0x01, 0x30,
202 0x21, 0x11, 0x21, 0x32, 0x3D, 0x01, 0x34, 0x27, 0x32, 0x16, 0x1D, 0x01, 0x14, 0x06, 0x23, 0x21,
203 0x26, 0x35, 0x11, 0x34, 0x33, 0x01, 0x11, 0x33, 0x32, 0x17, 0x16, 0x17, 0x16, 0x07, 0x06, 0x07,
204 0x17, 0x1E, 0x01, 0x1F, 0x01, 0x23, 0x2A, 0x01, 0x2E, 0x01, 0x23, 0x27, 0x15, 0x37, 0x32, 0x37,
205 0x36, 0x27, 0x2E, 0x01, 0x2B, 0x01, 0x15, 0x05, 0x26, 0x27, 0x37, 0x32, 0x3F, 0x01, 0x16, 0x17,
206 0x1E, 0x01, 0x37, 0x36, 0x27, 0x2E, 0x04, 0x37, 0x3E, 0x01, 0x33, 0x32, 0x17, 0x16, 0x17, 0x14,
207 0x06, 0x27, 0x26, 0x27, 0x26, 0x0E, 0x01, 0x1E, 0x02, 0x17, 0x16, 0x06, 0x07, 0x06, 0x07, 0x06,
208 0x03, 0x1B, 0xFD, 0x2A, 0x02, 0xD6, 0xA0, 0xA0, 0x57, 0x7A, 0x7A, 0x57, 0xFD, 0x12, 0x19, 0x19,
209 0x01, 0xD3, 0x47, 0x44, 0x11, 0x3E, 0x18, 0x21, 0x0B, 0x0C, 0x43, 0x04, 0x17, 0x1C, 0x1E, 0x16,
210 0x26, 0x26, 0x03, 0x4D, 0x18, 0x1E, 0x11, 0x25, 0x3A, 0x0C, 0x22, 0x08, 0x03, 0x1B, 0x3E, 0x29,
211 0xFE, 0xAC, 0x0D, 0x04, 0x02, 0x02, 0x1E, 0x1D, 0x03, 0x02, 0x0C, 0x4C, 0x13, 0x20, 0x07, 0x04,
212 0x1B, 0x56, 0x2D, 0x1C, 0x01, 0x02, 0x44, 0x35, 0x49, 0x1F, 0x10, 0x03, 0x41, 0x01, 0x06, 0x0A,
213 0x16, 0x3C, 0x18, 0x0C, 0x16, 0x5D, 0x15, 0x33, 0x03, 0x2B, 0x1E, 0x34, 0x59, 0x02, 0x84, 0xFE,
214 0x00, 0xAF, 0xA2, 0xAF, 0x32, 0x85, 0x5C, 0xA2, 0x5C, 0x84, 0x01, 0x17, 0x02, 0x32, 0x19, 0xFE,
215 0x2F, 0x01, 0x45, 0x01, 0x02, 0x19, 0x22, 0x32, 0x39, 0x0B, 0x08, 0x0F, 0x27, 0x2F, 0x24, 0x75,
216 0x12, 0x01, 0x88, 0xBB, 0x04, 0x09, 0x2A, 0x0F, 0x0D, 0x53, 0x8A, 0x17, 0x1E, 0x04, 0x03, 0x03,
217 0x0C, 0x04, 0x26, 0x0E, 0x0C, 0x14, 0x1A, 0x0E, 0x0E, 0x16, 0x16, 0x2C, 0x1A, 0x2D, 0x2D, 0x2A,
218 0x16, 0x1D, 0x06, 0x04, 0x01, 0x1A, 0x09, 0x11, 0x09, 0x17, 0x18, 0x0D, 0x17, 0x0C, 0x1B, 0x71,
219 0x1B, 0x12, 0x01, 0x03, 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F,
220 0x00, 0x1B, 0x00, 0x27, 0x00, 0x00, 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02,
221 0x34, 0x2E, 0x01, 0x24, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13,
222 0x33, 0x35, 0x33, 0x15, 0x33, 0x15, 0x23, 0x15, 0x23, 0x35, 0x23, 0x02, 0x5A, 0xB4, 0xA4, 0x77,
223 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84,
224 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0xC5, 0x4E, 0xC5, 0xC4, 0x50, 0xC4, 0x03, 0x3F,
225 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2,
226 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE, 0xC0, 0xC4, 0xC5, 0x4E, 0xC5, 0xC5,
227 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x1F,
228 0x00, 0x00, 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x01, 0x24,
229 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13, 0x35, 0x21, 0x15, 0x02,
230 0x5A, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xFE, 0x7C,
231 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0x01, 0xD8, 0x03, 0x3F,
232 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2,
233 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE, 0x71, 0x4E, 0x4E, 0x00, 0x00, 0x00,
234 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x1B, 0x00, 0x25,
235 0x00, 0x00, 0x00, 0x20, 0x0E, 0x01, 0x10, 0x1E, 0x01, 0x20, 0x3E, 0x01, 0x10, 0x26, 0x01, 0x12,
236 0x37, 0x33, 0x13, 0x12, 0x15, 0x16, 0x23, 0x2F, 0x01, 0x23, 0x07, 0x23, 0x22, 0x26, 0x25, 0x30,
237 0x27, 0x26, 0x2F, 0x01, 0x06, 0x07, 0x06, 0x32, 0x02, 0x86, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2,
238 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xFD, 0xA0, 0x6C, 0x5E, 0x6D, 0x68, 0x68, 0x01, 0x39, 0x38, 0x2E,
239 0xD1, 0x2B, 0x37, 0x33, 0x04, 0x01, 0x48, 0x1D, 0x1C, 0x0A, 0x05, 0x01, 0x45, 0x01, 0x89, 0x03,
240 0x70, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFD, 0x9A, 0x01, 0x1A,
241 0xEB, 0xFE, 0xFE, 0xFE, 0xFD, 0x03, 0x01, 0x01, 0x77, 0x78, 0x01, 0xCF, 0x4C, 0x4C, 0x1C, 0x0C,
242 0x02, 0xBE, 0x02, 0x00, 0x00, 0x04, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B,
243 0x00, 0x20, 0x00, 0x2B, 0x00, 0x35, 0x00, 0x00, 0x36, 0x10, 0x3E, 0x01, 0x20, 0x1E, 0x01, 0x10,
244 0x0E, 0x01, 0x20, 0x26, 0x01, 0x30, 0x37, 0x36, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26, 0x23, 0x27,
245 0x19, 0x01, 0x33, 0x32, 0x37, 0x3E, 0x01, 0x35, 0x26, 0x07, 0x06, 0x2B, 0x01, 0x35, 0x33, 0x1E,
246 0x02, 0x15, 0x06, 0x27, 0x23, 0x35, 0x33, 0x16, 0x17, 0x16, 0x14, 0x07, 0x06, 0x14, 0x84, 0xE2,
247 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x01, 0xF7, 0x0A, 0x3A, 0x05, 0x04, 0x19,
248 0x20, 0x3B, 0x1D, 0x8B, 0x81, 0x4E, 0xAF, 0x29, 0x3E, 0x4E, 0x01, 0xAE, 0x0D, 0x47, 0x46, 0x46,
249 0x52, 0x2C, 0x1B, 0x01, 0xB7, 0x27, 0x4C, 0x4C, 0x07, 0x2C, 0x1E, 0x16, 0xFE, 0x01, 0x0C, 0xE2,
250 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x01, 0x6D, 0x06, 0x21, 0x40, 0x2A, 0x24, 0x30,
251 0x09, 0x05, 0x01, 0xFE, 0xFB, 0xFE, 0xFD, 0x03, 0x05, 0x4F, 0x41, 0x5B, 0x9B, 0x01, 0x8C, 0x01,
252 0x0B, 0x21, 0x1A, 0x3E, 0xDA, 0x79, 0x01, 0x01, 0x0B, 0x54, 0x0E, 0x0A, 0x00, 0x02, 0x00, 0x14,
253 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x29, 0x00, 0x00, 0x36, 0x10, 0x3E, 0x01,
254 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x26, 0x36, 0x14, 0x3B, 0x01, 0x37, 0x36, 0x37, 0x36,
255 0x1F, 0x01, 0x33, 0x32, 0x34, 0x02, 0x26, 0x36, 0x34, 0x23, 0x0F, 0x01, 0x06, 0x07, 0x22, 0x2F,
256 0x01, 0x23, 0x16, 0x1F, 0x01, 0x07, 0x14, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE,
257 0xF4, 0xE2, 0x7B, 0x3D, 0x3F, 0x38, 0x3A, 0x01, 0x02, 0x3A, 0x39, 0x3F, 0x3E, 0xB0, 0x01, 0xA1,
258 0x3C, 0x3C, 0x32, 0x33, 0x01, 0x02, 0x34, 0x34, 0x7A, 0x05, 0x4F, 0x4D, 0x58, 0xFE, 0x01, 0x0C,
259 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x62, 0x02, 0x59, 0x59, 0x02, 0x01, 0x5A,
260 0x5B, 0x02, 0x01, 0x08, 0x04, 0xFB, 0x01, 0x01, 0x53, 0x53, 0x01, 0x54, 0x54, 0x06, 0x7A, 0x79,
261 0x88, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B,
262 0x00, 0x1B, 0x00, 0x00, 0x00, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36,
263 0x01, 0x15, 0x33, 0x35, 0x13, 0x23, 0x07, 0x0E, 0x01, 0x2F, 0x01, 0x23, 0x22, 0x16, 0x1F, 0x01,
264 0x01, 0x7A, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x01, 0x36, 0x68,
265 0xBE, 0x77, 0x3B, 0x3C, 0x02, 0x3D, 0x3D, 0x3D, 0x3D, 0x01, 0x5F, 0x5E, 0x03, 0x70, 0x84, 0xE2,
266 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFD, 0xF9, 0x6D, 0xDA, 0x01, 0x2D, 0x65,
267 0x66, 0x03, 0x67, 0x67, 0x01, 0x95, 0x96, 0x00, 0x00, 0x02, 0x00, 0x14, 0xFF, 0xBF, 0x03, 0xEC,
268 0x03, 0x4A, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x05, 0x21, 0x11, 0x10, 0x05, 0x21, 0x01, 0x21,
269 0x35, 0x21, 0x11, 0x23, 0x03, 0xEC, 0xFC, 0x28, 0x01, 0x14, 0x02, 0xC4, 0xFD, 0x5C, 0x01, 0x70,
270 0xFE, 0xF8, 0x68, 0x41, 0x02, 0x77, 0x01, 0x14, 0x01, 0xFD, 0x38, 0x57, 0x01, 0xB0, 0x00, 0x00,
271 0x00, 0x03, 0x00, 0x14, 0xFF, 0xBF, 0x03, 0xEC, 0x03, 0x49, 0x00, 0x05, 0x00, 0x20, 0x00, 0x2B,
272 0x00, 0x00, 0x17, 0x11, 0x21, 0x20, 0x19, 0x01, 0x25, 0x33, 0x35, 0x17, 0x1E, 0x01, 0x1F, 0x01,
273 0x33, 0x37, 0x2E, 0x02, 0x27, 0x34, 0x37, 0x36, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26,
274 0x2B, 0x01, 0x05, 0x06, 0x2B, 0x01, 0x35, 0x33, 0x16, 0x17, 0x16, 0x15, 0x14, 0x14, 0x02, 0xC4,
275 0x01, 0x14, 0xFD, 0x2A, 0x69, 0x19, 0x2E, 0x28, 0x56, 0x2A, 0x3D, 0x3D, 0x01, 0x65, 0x2C, 0x20,
276 0x0D, 0x66, 0x13, 0x06, 0x04, 0x09, 0x34, 0x20, 0x49, 0x15, 0x76, 0x77, 0x01, 0x02, 0x0C, 0x47,
277 0x46, 0x4F, 0x56, 0x10, 0x20, 0x41, 0x03, 0x8A, 0xFE, 0xED, 0xFD, 0x89, 0xC2, 0xDA, 0x01, 0x01,
278 0x1A, 0x81, 0x3D, 0x01, 0x01, 0xA3, 0x2C, 0x13, 0x01, 0x02, 0x13, 0x5A, 0x1A, 0x1C, 0x44, 0x21,
279 0x13, 0x04, 0x01, 0xDA, 0x02, 0x85, 0x01, 0x08, 0x0F, 0x29, 0x3A, 0x00, 0x00, 0x03, 0x00, 0x14,
280 0xFF, 0xFB, 0x03, 0xEC, 0x03, 0x0E, 0x00, 0x08, 0x00, 0x15, 0x00, 0x1B, 0x00, 0x00, 0x05, 0x21,
281 0x11, 0x10, 0x21, 0x30, 0x21, 0x32, 0x15, 0x01, 0x21, 0x35, 0x23, 0x13, 0x35, 0x21, 0x15, 0x33,
282 0x32, 0x22, 0x0F, 0x01, 0x05, 0x21, 0x35, 0x23, 0x11, 0x23, 0x03, 0xEC, 0xFC, 0x28, 0x01, 0x8A,
283 0x01, 0xEC, 0x62, 0xFC, 0xCF, 0x01, 0x40, 0xE1, 0xD9, 0xFE, 0xDF, 0x5D, 0x5C, 0x01, 0x67, 0x68,
284 0x01, 0x75, 0x01, 0x15, 0xC6, 0x4F, 0x05, 0x01, 0x89, 0x01, 0x8A, 0x63, 0xFD, 0xE1, 0x42, 0x01,
285 0x0B, 0x3D, 0x42, 0x80, 0x80, 0x48, 0x42, 0x01, 0x44, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14,
286 0xFF, 0xFB, 0x03, 0xEC, 0x03, 0x0E, 0x00, 0x07, 0x00, 0x22, 0x00, 0x2F, 0x00, 0x3C, 0x00, 0x00,
287 0x17, 0x11, 0x34, 0x37, 0x21, 0x20, 0x19, 0x01, 0x01, 0x15, 0x33, 0x35, 0x17, 0x1E, 0x01, 0x1F,
288 0x02, 0x32, 0x35, 0x26, 0x27, 0x26, 0x27, 0x26, 0x37, 0x36, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26,
289 0x23, 0x27, 0x17, 0x30, 0x23, 0x35, 0x33, 0x32, 0x17, 0x16, 0x17, 0x14, 0x07, 0x0E, 0x01, 0x05,
290 0x21, 0x35, 0x27, 0x13, 0x35, 0x21, 0x15, 0x33, 0x32, 0x14, 0x0F, 0x01, 0x14, 0x62, 0x01, 0xEC,
291 0x01, 0x8A, 0xFE, 0x1E, 0x4E, 0x14, 0x29, 0x1E, 0x37, 0x22, 0x2F, 0x2F, 0x06, 0x3A, 0x1D, 0x1F,
292 0x09, 0x09, 0x4E, 0x0E, 0x04, 0x05, 0x0F, 0x47, 0x15, 0x6F, 0x65, 0x82, 0x34, 0x37, 0x38, 0x07,
293 0x23, 0x09, 0x13, 0x0D, 0x1A, 0xFD, 0xD6, 0x01, 0x40, 0xE1, 0xD8, 0xFE, 0xE0, 0x5C, 0x5C, 0x67,
294 0x68, 0x05, 0x02, 0xB0, 0x62, 0x01, 0xFE, 0x76, 0xFE, 0x77, 0x01, 0x56, 0xC5, 0xA5, 0x01, 0x01,
295 0x1C, 0x52, 0x34, 0x01, 0x01, 0x0E, 0x58, 0x2C, 0x13, 0x06, 0x04, 0x0F, 0x45, 0x1E, 0x14, 0x42,
296 0x0D, 0x04, 0x01, 0xA7, 0x65, 0x01, 0x04, 0x2C, 0x21, 0x09, 0x07, 0x03, 0xE3, 0x41, 0x01, 0x01,
297 0x0B, 0x3D, 0x42, 0x01, 0x80, 0x80, 0x00, 0x00, 0x00, 0x03, 0x00, 0x14, 0x00, 0x5D, 0x03, 0xEC,
298 0x02, 0xAB, 0x00, 0x08, 0x00, 0x37, 0x00, 0x3D, 0x00, 0x00, 0x13, 0x30, 0x21, 0x11, 0x21, 0x22,
299 0x3D, 0x01, 0x34, 0x05, 0x37, 0x34, 0x27, 0x26, 0x27, 0x26, 0x07, 0x06, 0x07, 0x0E, 0x01, 0x17,
300 0x1E, 0x01, 0x17, 0x16, 0x14, 0x07, 0x06, 0x26, 0x27, 0x26, 0x27, 0x22, 0x06, 0x07, 0x22, 0x17,
301 0x1E, 0x01, 0x17, 0x16, 0x37, 0x36, 0x27, 0x26, 0x27, 0x2E, 0x02, 0x37, 0x36, 0x33, 0x32, 0x1F,
302 0x02, 0x33, 0x35, 0x23, 0x11, 0x23, 0xD6, 0x03, 0x16, 0xFC, 0xEA, 0xC2, 0x01, 0xC6, 0x02, 0x01,
303 0x0C, 0x3A, 0x2B, 0x2D, 0x13, 0x10, 0x2B, 0x01, 0x33, 0x17, 0x55, 0x15, 0x04, 0x09, 0x14, 0x58,
304 0x0C, 0x04, 0x02, 0x02, 0x26, 0x14, 0x01, 0x03, 0x08, 0x33, 0x38, 0x5F, 0x20, 0x10, 0x01, 0x03,
305 0x3C, 0x12, 0x59, 0x11, 0x01, 0x02, 0x39, 0x2C, 0x09, 0x02, 0x9D, 0xE2, 0xA2, 0x40, 0x02, 0xAB,
306 0xFD, 0xB2, 0xD2, 0xAA, 0xD2, 0xDC, 0x03, 0x07, 0x0B, 0x38, 0x10, 0x0C, 0x09, 0x04, 0x08, 0x19,
307 0x6C, 0x17, 0x0B, 0x17, 0x11, 0x07, 0x17, 0x0A, 0x1A, 0x0A, 0x29, 0x0C, 0x04, 0x04, 0x02, 0x10,
308 0x25, 0x37, 0x04, 0x06, 0x37, 0x1D, 0x1C, 0x3F, 0x19, 0x08, 0x16, 0x13, 0x0B, 0x1F, 0x2B, 0x04,
309 0xE9, 0x37, 0x01, 0x13, 0x00, 0x04, 0x00, 0x14, 0x00, 0x5D, 0x03, 0xEC, 0x02, 0xAB, 0x00, 0x07,
310 0x00, 0x1F, 0x00, 0x2A, 0x00, 0x58, 0x00, 0x00, 0x01, 0x32, 0x1D, 0x01, 0x14, 0x23, 0x21, 0x11,
311 0x01, 0x33, 0x35, 0x17, 0x1E, 0x03, 0x3B, 0x01, 0x27, 0x2E, 0x01, 0x2F, 0x01, 0x36, 0x37, 0x36,
312 0x27, 0x26, 0x27, 0x26, 0x2B, 0x01, 0x17, 0x30, 0x23, 0x35, 0x33, 0x32, 0x16, 0x17, 0x16, 0x07,
313 0x06, 0x05, 0x16, 0x37, 0x36, 0x37, 0x3E, 0x01, 0x27, 0x2E, 0x03, 0x3E, 0x01, 0x17, 0x16, 0x17,
314 0x30, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26, 0x27, 0x22, 0x06, 0x07, 0x06, 0x1E, 0x03, 0x17, 0x16,
315 0x07, 0x06, 0x26, 0x27, 0x26, 0x27, 0x07, 0x06, 0x23, 0x07, 0x16, 0x03, 0x2A, 0xC2, 0xC2, 0xFC,
316 0xEA, 0x01, 0xEC, 0x41, 0x11, 0x1F, 0x17, 0x4D, 0x02, 0x27, 0x26, 0x16, 0x1E, 0x1C, 0x17, 0x04,
317 0x43, 0x0C, 0x0B, 0x21, 0x18, 0x3E, 0x0F, 0x46, 0x47, 0x66, 0x25, 0x29, 0x3E, 0x1B, 0x03, 0x08,
318 0x22, 0x0C, 0xFE, 0x4D, 0x22, 0x59, 0x34, 0x1E, 0x2B, 0x03, 0x33, 0x16, 0x5C, 0x16, 0x0C, 0x18,
319 0x3C, 0x16, 0x0B, 0x05, 0x22, 0x21, 0x01, 0x03, 0x10, 0x1F, 0x49, 0x36, 0x43, 0x02, 0x01, 0x1C,
320 0x2D, 0x56, 0x1B, 0x04, 0x07, 0x20, 0x13, 0x4B, 0x0D, 0x01, 0x04, 0x1D, 0x1E, 0x02, 0x02, 0x04,
321 0x02, 0xAB, 0xD2, 0xAA, 0xD2, 0x02, 0x4E, 0xFE, 0x39, 0x89, 0x01, 0x01, 0x11, 0x75, 0x01, 0x25,
322 0x2F, 0x27, 0x0F, 0x08, 0x0C, 0x38, 0x33, 0x21, 0x19, 0x02, 0x01, 0x8A, 0x53, 0x0D, 0x0F, 0x2A,
323 0x09, 0x04, 0x8A, 0x3A, 0x03, 0x01, 0x12, 0x1B, 0x71, 0x1B, 0x0C, 0x17, 0x0D, 0x18, 0x17, 0x09,
324 0x11, 0x09, 0x1A, 0x01, 0x01, 0x07, 0x1E, 0x15, 0x29, 0x01, 0x2D, 0x2D, 0x1A, 0x2C, 0x16, 0x16,
325 0x0D, 0x0F, 0x1A, 0x14, 0x0C, 0x0D, 0x27, 0x04, 0x0C, 0x03, 0x03, 0x04, 0x1E, 0x00, 0x00, 0x00,
326 0x00, 0x02, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x17, 0x00, 0x00,
327 0x00, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13, 0x15, 0x33, 0x15,
328 0x33, 0x35, 0x33, 0x35, 0x23, 0x35, 0x23, 0x15, 0x01, 0x7A, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2,
329 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0xC4, 0x50, 0xC4, 0xC5, 0x4E, 0x03, 0x70, 0x84, 0xE2, 0xFE,
330 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE, 0xC0, 0x4F, 0xC5, 0xC5, 0x4E, 0xC5, 0xC4,
331 0x00, 0x02, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x0F, 0x00, 0x00,
332 0x00, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13, 0x21, 0x35, 0x21,
333 0x01, 0x7A, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0x01, 0xD8,
334 0xFE, 0x28, 0x03, 0x70, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE,
335 0x71, 0x4E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0xAE, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
336 0x00, 0x00, 0x00, 0x15, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10,
337 0x00, 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x07, 0x00, 0x85, 0x00, 0x01,
338 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0xAF, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
339 0x00, 0x04, 0x00, 0x10, 0x00, 0xE2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0D,
340 0x01, 0x0F, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x01, 0x3F, 0x00, 0x03,
341 0x00, 0x01, 0x04, 0x09, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09,
342 0x00, 0x01, 0x00, 0x20, 0x00, 0x42, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x02, 0x00, 0x0E,
343 0x00, 0x75, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x03, 0x00, 0x20, 0x00, 0x8D, 0x00, 0x03,
344 0x00, 0x01, 0x04, 0x09, 0x00, 0x04, 0x00, 0x20, 0x00, 0xC0, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09,
345 0x00, 0x05, 0x00, 0x1A, 0x00, 0xF3, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x06, 0x00, 0x20,
346 0x01, 0x1D, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x20, 0x00, 0x45, 0x00, 0x6D,
347 0x00, 0x75, 0x00, 0x6C, 0x00, 0x61, 0x00, 0x74, 0x00, 0x6F, 0x00, 0x72, 0x00, 0x20, 0x00, 0x50,
348 0x00, 0x72, 0x00, 0x6F, 0x00, 0x6A, 0x00, 0x65, 0x00, 0x63, 0x00, 0x74, 0x00, 0x00, 0x59, 0x75,
349 0x7A, 0x75, 0x20, 0x45, 0x6D, 0x75, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x20, 0x50, 0x72, 0x6F, 0x6A,
350 0x65, 0x63, 0x74, 0x00, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53,
351 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69,
352 0x00, 0x6F, 0x00, 0x6E, 0x00, 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74,
353 0x65, 0x6E, 0x73, 0x69, 0x6F, 0x6E, 0x00, 0x00, 0x52, 0x00, 0x65, 0x00, 0x67, 0x00, 0x75, 0x00,
354 0x6C, 0x00, 0x61, 0x00, 0x72, 0x00, 0x00, 0x52, 0x65, 0x67, 0x75, 0x6C, 0x61, 0x72, 0x00, 0x00,
355 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00,
356 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6F, 0x00, 0x6E, 0x00,
357 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6E, 0x73, 0x69, 0x6F,
358 0x6E, 0x00, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53, 0x00, 0x53,
359 0x00, 0x45, 0x00, 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6F,
360 0x00, 0x6E, 0x00, 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6E,
361 0x73, 0x69, 0x6F, 0x6E, 0x00, 0x00, 0x56, 0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x69, 0x00,
362 0x6F, 0x00, 0x6E, 0x00, 0x20, 0x00, 0x31, 0x00, 0x2E, 0x00, 0x30, 0x00, 0x30, 0x00, 0x30, 0x00,
363 0x00, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x20, 0x31, 0x2E, 0x30, 0x30, 0x30, 0x00, 0x00,
364 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00,
365 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6F, 0x00, 0x6E, 0x00,
366 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6E, 0x73, 0x69, 0x6F,
367 0x6E, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xB5, 0x00, 0x32,
368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
369 0x00, 0x00, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x01, 0x02, 0x01, 0x03, 0x00, 0x03, 0x01, 0x04,
370 0x01, 0x05, 0x01, 0x06, 0x01, 0x07, 0x01, 0x08, 0x01, 0x09, 0x01, 0x0A, 0x01, 0x0B, 0x01, 0x0C,
371 0x01, 0x0D, 0x01, 0x0E, 0x01, 0x0F, 0x01, 0x10, 0x01, 0x11, 0x01, 0x12, 0x01, 0x13, 0x01, 0x14,
372 0x01, 0x15, 0x01, 0x16, 0x01, 0x17, 0x01, 0x18, 0x01, 0x19, 0x01, 0x1A, 0x01, 0x1B, 0x07, 0x75,
373 0x6E, 0x69, 0x30, 0x30, 0x30, 0x30, 0x07, 0x75, 0x6E, 0x69, 0x30, 0x30, 0x30, 0x44, 0x07, 0x75,
374 0x6E, 0x69, 0x45, 0x30, 0x41, 0x30, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x31, 0x07, 0x75,
375 0x6E, 0x69, 0x45, 0x30, 0x41, 0x32, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x33, 0x07, 0x75,
376 0x6E, 0x69, 0x45, 0x30, 0x41, 0x34, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x35, 0x07, 0x75,
377 0x6E, 0x69, 0x45, 0x30, 0x41, 0x36, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x37, 0x07, 0x75,
378 0x6E, 0x69, 0x45, 0x30, 0x41, 0x38, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x39, 0x07, 0x75,
379 0x6E, 0x69, 0x45, 0x30, 0x42, 0x33, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x42, 0x34, 0x07, 0x75,
380 0x6E, 0x69, 0x45, 0x30, 0x45, 0x30, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x31, 0x07, 0x75,
381 0x6E, 0x69, 0x45, 0x30, 0x45, 0x32, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x33, 0x07, 0x75,
382 0x6E, 0x69, 0x45, 0x30, 0x45, 0x34, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x35, 0x07, 0x75,
383 0x6E, 0x69, 0x45, 0x30, 0x45, 0x36, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x37, 0x07, 0x75,
384 0x6E, 0x69, 0x45, 0x30, 0x45, 0x38, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x39, 0x07, 0x75,
385 0x6E, 0x69, 0x45, 0x30, 0x45, 0x46, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x46, 0x30, 0x00, 0x00,
386 0x00, 0x01, 0x00, 0x01, 0xFF, 0xFF, 0x00, 0x0F,
194}}; 387}};
195 388
196} // namespace FileSys::SystemArchive::SharedFontData 389} // namespace FileSys::SystemArchive::SharedFontData
diff --git a/src/core/file_sys/system_archive/data/font_nintendo_extended.h b/src/core/file_sys/system_archive/data/font_nintendo_extended.h
index 2089f3db9..edb9df914 100644
--- a/src/core/file_sys/system_archive/data/font_nintendo_extended.h
+++ b/src/core/file_sys/system_archive/data/font_nintendo_extended.h
@@ -8,6 +8,6 @@
8 8
9namespace FileSys::SystemArchive::SharedFontData { 9namespace FileSys::SystemArchive::SharedFontData {
10 10
11extern const std::array<unsigned char, 2932> FONT_NINTENDO_EXTENDED; 11extern const std::array<unsigned char, 6024> FONT_NINTENDO_EXTENDED;
12 12
13} // namespace FileSys::SystemArchive::SharedFontData 13} // namespace FileSys::SystemArchive::SharedFontData
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
index aa313de66..7bfbc9a67 100644
--- a/src/core/file_sys/system_archive/system_version.cpp
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -12,17 +12,17 @@ namespace SystemVersionData {
12// This section should reflect the best system version to describe yuzu's HLE api. 12// This section should reflect the best system version to describe yuzu's HLE api.
13// TODO(DarkLordZach): Update when HLE gets better. 13// TODO(DarkLordZach): Update when HLE gets better.
14 14
15constexpr u8 VERSION_MAJOR = 10; 15constexpr u8 VERSION_MAJOR = 11;
16constexpr u8 VERSION_MINOR = 0; 16constexpr u8 VERSION_MINOR = 0;
17constexpr u8 VERSION_MICRO = 2; 17constexpr u8 VERSION_MICRO = 0;
18 18
19constexpr u8 REVISION_MAJOR = 1; 19constexpr u8 REVISION_MAJOR = 5;
20constexpr u8 REVISION_MINOR = 0; 20constexpr u8 REVISION_MINOR = 0;
21 21
22constexpr char PLATFORM_STRING[] = "NX"; 22constexpr char PLATFORM_STRING[] = "NX";
23constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc"; 23constexpr char VERSION_HASH[] = "34197eba8810e2edd5e9dfcfbde7b340882e856d";
24constexpr char DISPLAY_VERSION[] = "10.0.2"; 24constexpr char DISPLAY_VERSION[] = "11.0.0";
25constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0"; 25constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 11.0.0-5.0";
26 26
27} // namespace SystemVersionData 27} // namespace SystemVersionData
28 28
diff --git a/src/core/file_sys/vfs.cpp b/src/core/file_sys/vfs.cpp
index b2f026b6d..f497e9396 100644
--- a/src/core/file_sys/vfs.cpp
+++ b/src/core/file_sys/vfs.cpp
@@ -203,7 +203,7 @@ std::string VfsFile::GetFullPath() const {
203 return GetContainingDirectory()->GetFullPath() + "/" + GetName(); 203 return GetContainingDirectory()->GetFullPath() + "/" + GetName();
204} 204}
205 205
206std::shared_ptr<VfsFile> VfsDirectory::GetFileRelative(std::string_view path) const { 206VirtualFile VfsDirectory::GetFileRelative(std::string_view path) const {
207 auto vec = Common::FS::SplitPathComponents(path); 207 auto vec = Common::FS::SplitPathComponents(path);
208 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), 208 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }),
209 vec.end()); 209 vec.end());
@@ -231,7 +231,7 @@ std::shared_ptr<VfsFile> VfsDirectory::GetFileRelative(std::string_view path) co
231 return dir->GetFile(vec.back()); 231 return dir->GetFile(vec.back());
232} 232}
233 233
234std::shared_ptr<VfsFile> VfsDirectory::GetFileAbsolute(std::string_view path) const { 234VirtualFile VfsDirectory::GetFileAbsolute(std::string_view path) const {
235 if (IsRoot()) { 235 if (IsRoot()) {
236 return GetFileRelative(path); 236 return GetFileRelative(path);
237 } 237 }
@@ -239,7 +239,7 @@ std::shared_ptr<VfsFile> VfsDirectory::GetFileAbsolute(std::string_view path) co
239 return GetParentDirectory()->GetFileAbsolute(path); 239 return GetParentDirectory()->GetFileAbsolute(path);
240} 240}
241 241
242std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryRelative(std::string_view path) const { 242VirtualDir VfsDirectory::GetDirectoryRelative(std::string_view path) const {
243 auto vec = Common::FS::SplitPathComponents(path); 243 auto vec = Common::FS::SplitPathComponents(path);
244 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), 244 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }),
245 vec.end()); 245 vec.end());
@@ -261,7 +261,7 @@ std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryRelative(std::string_vie
261 return dir; 261 return dir;
262} 262}
263 263
264std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryAbsolute(std::string_view path) const { 264VirtualDir VfsDirectory::GetDirectoryAbsolute(std::string_view path) const {
265 if (IsRoot()) { 265 if (IsRoot()) {
266 return GetDirectoryRelative(path); 266 return GetDirectoryRelative(path);
267 } 267 }
@@ -269,14 +269,14 @@ std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryAbsolute(std::string_vie
269 return GetParentDirectory()->GetDirectoryAbsolute(path); 269 return GetParentDirectory()->GetDirectoryAbsolute(path);
270} 270}
271 271
272std::shared_ptr<VfsFile> VfsDirectory::GetFile(std::string_view name) const { 272VirtualFile VfsDirectory::GetFile(std::string_view name) const {
273 const auto& files = GetFiles(); 273 const auto& files = GetFiles();
274 const auto iter = std::find_if(files.begin(), files.end(), 274 const auto iter = std::find_if(files.begin(), files.end(),
275 [&name](const auto& file1) { return name == file1->GetName(); }); 275 [&name](const auto& file1) { return name == file1->GetName(); });
276 return iter == files.end() ? nullptr : *iter; 276 return iter == files.end() ? nullptr : *iter;
277} 277}
278 278
279std::shared_ptr<VfsDirectory> VfsDirectory::GetSubdirectory(std::string_view name) const { 279VirtualDir VfsDirectory::GetSubdirectory(std::string_view name) const {
280 const auto& subs = GetSubdirectories(); 280 const auto& subs = GetSubdirectories();
281 const auto iter = std::find_if(subs.begin(), subs.end(), 281 const auto iter = std::find_if(subs.begin(), subs.end(),
282 [&name](const auto& file1) { return name == file1->GetName(); }); 282 [&name](const auto& file1) { return name == file1->GetName(); });
@@ -301,7 +301,7 @@ std::size_t VfsDirectory::GetSize() const {
301 return file_total + subdir_total; 301 return file_total + subdir_total;
302} 302}
303 303
304std::shared_ptr<VfsFile> VfsDirectory::CreateFileRelative(std::string_view path) { 304VirtualFile VfsDirectory::CreateFileRelative(std::string_view path) {
305 auto vec = Common::FS::SplitPathComponents(path); 305 auto vec = Common::FS::SplitPathComponents(path);
306 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), 306 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }),
307 vec.end()); 307 vec.end());
@@ -324,7 +324,7 @@ std::shared_ptr<VfsFile> VfsDirectory::CreateFileRelative(std::string_view path)
324 return dir->CreateFileRelative(Common::FS::GetPathWithoutTop(path)); 324 return dir->CreateFileRelative(Common::FS::GetPathWithoutTop(path));
325} 325}
326 326
327std::shared_ptr<VfsFile> VfsDirectory::CreateFileAbsolute(std::string_view path) { 327VirtualFile VfsDirectory::CreateFileAbsolute(std::string_view path) {
328 if (IsRoot()) { 328 if (IsRoot()) {
329 return CreateFileRelative(path); 329 return CreateFileRelative(path);
330 } 330 }
@@ -332,7 +332,7 @@ std::shared_ptr<VfsFile> VfsDirectory::CreateFileAbsolute(std::string_view path)
332 return GetParentDirectory()->CreateFileAbsolute(path); 332 return GetParentDirectory()->CreateFileAbsolute(path);
333} 333}
334 334
335std::shared_ptr<VfsDirectory> VfsDirectory::CreateDirectoryRelative(std::string_view path) { 335VirtualDir VfsDirectory::CreateDirectoryRelative(std::string_view path) {
336 auto vec = Common::FS::SplitPathComponents(path); 336 auto vec = Common::FS::SplitPathComponents(path);
337 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), 337 vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }),
338 vec.end()); 338 vec.end());
@@ -355,7 +355,7 @@ std::shared_ptr<VfsDirectory> VfsDirectory::CreateDirectoryRelative(std::string_
355 return dir->CreateDirectoryRelative(Common::FS::GetPathWithoutTop(path)); 355 return dir->CreateDirectoryRelative(Common::FS::GetPathWithoutTop(path));
356} 356}
357 357
358std::shared_ptr<VfsDirectory> VfsDirectory::CreateDirectoryAbsolute(std::string_view path) { 358VirtualDir VfsDirectory::CreateDirectoryAbsolute(std::string_view path) {
359 if (IsRoot()) { 359 if (IsRoot()) {
360 return CreateDirectoryRelative(path); 360 return CreateDirectoryRelative(path);
361 } 361 }
@@ -446,27 +446,27 @@ bool ReadOnlyVfsDirectory::IsReadable() const {
446 return true; 446 return true;
447} 447}
448 448
449std::shared_ptr<VfsDirectory> ReadOnlyVfsDirectory::CreateSubdirectory(std::string_view name) { 449VirtualDir ReadOnlyVfsDirectory::CreateSubdirectory(std::string_view name) {
450 return nullptr; 450 return nullptr;
451} 451}
452 452
453std::shared_ptr<VfsFile> ReadOnlyVfsDirectory::CreateFile(std::string_view name) { 453VirtualFile ReadOnlyVfsDirectory::CreateFile(std::string_view name) {
454 return nullptr; 454 return nullptr;
455} 455}
456 456
457std::shared_ptr<VfsFile> ReadOnlyVfsDirectory::CreateFileAbsolute(std::string_view path) { 457VirtualFile ReadOnlyVfsDirectory::CreateFileAbsolute(std::string_view path) {
458 return nullptr; 458 return nullptr;
459} 459}
460 460
461std::shared_ptr<VfsFile> ReadOnlyVfsDirectory::CreateFileRelative(std::string_view path) { 461VirtualFile ReadOnlyVfsDirectory::CreateFileRelative(std::string_view path) {
462 return nullptr; 462 return nullptr;
463} 463}
464 464
465std::shared_ptr<VfsDirectory> ReadOnlyVfsDirectory::CreateDirectoryAbsolute(std::string_view path) { 465VirtualDir ReadOnlyVfsDirectory::CreateDirectoryAbsolute(std::string_view path) {
466 return nullptr; 466 return nullptr;
467} 467}
468 468
469std::shared_ptr<VfsDirectory> ReadOnlyVfsDirectory::CreateDirectoryRelative(std::string_view path) { 469VirtualDir ReadOnlyVfsDirectory::CreateDirectoryRelative(std::string_view path) {
470 return nullptr; 470 return nullptr;
471} 471}
472 472
diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h
index 954094772..afd64e95c 100644
--- a/src/core/file_sys/vfs.h
+++ b/src/core/file_sys/vfs.h
@@ -91,7 +91,7 @@ public:
91 // Resizes the file to new_size. Returns whether or not the operation was successful. 91 // Resizes the file to new_size. Returns whether or not the operation was successful.
92 virtual bool Resize(std::size_t new_size) = 0; 92 virtual bool Resize(std::size_t new_size) = 0;
93 // Gets a pointer to the directory containing this file, returning nullptr if there is none. 93 // Gets a pointer to the directory containing this file, returning nullptr if there is none.
94 virtual std::shared_ptr<VfsDirectory> GetContainingDirectory() const = 0; 94 virtual VirtualDir GetContainingDirectory() const = 0;
95 95
96 // Returns whether or not the file can be written to. 96 // Returns whether or not the file can be written to.
97 virtual bool IsWritable() const = 0; 97 virtual bool IsWritable() const = 0;
@@ -183,27 +183,27 @@ public:
183 183
184 // Retrives the file located at path as if the current directory was root. Returns nullptr if 184 // Retrives the file located at path as if the current directory was root. Returns nullptr if
185 // not found. 185 // not found.
186 virtual std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const; 186 virtual VirtualFile GetFileRelative(std::string_view path) const;
187 // Calls GetFileRelative(path) on the root of the current directory. 187 // Calls GetFileRelative(path) on the root of the current directory.
188 virtual std::shared_ptr<VfsFile> GetFileAbsolute(std::string_view path) const; 188 virtual VirtualFile GetFileAbsolute(std::string_view path) const;
189 189
190 // Retrives the directory located at path as if the current directory was root. Returns nullptr 190 // Retrives the directory located at path as if the current directory was root. Returns nullptr
191 // if not found. 191 // if not found.
192 virtual std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const; 192 virtual VirtualDir GetDirectoryRelative(std::string_view path) const;
193 // Calls GetDirectoryRelative(path) on the root of the current directory. 193 // Calls GetDirectoryRelative(path) on the root of the current directory.
194 virtual std::shared_ptr<VfsDirectory> GetDirectoryAbsolute(std::string_view path) const; 194 virtual VirtualDir GetDirectoryAbsolute(std::string_view path) const;
195 195
196 // Returns a vector containing all of the files in this directory. 196 // Returns a vector containing all of the files in this directory.
197 virtual std::vector<std::shared_ptr<VfsFile>> GetFiles() const = 0; 197 virtual std::vector<VirtualFile> GetFiles() const = 0;
198 // Returns the file with filename matching name. Returns nullptr if directory dosen't have a 198 // Returns the file with filename matching name. Returns nullptr if directory dosen't have a
199 // file with name. 199 // file with name.
200 virtual std::shared_ptr<VfsFile> GetFile(std::string_view name) const; 200 virtual VirtualFile GetFile(std::string_view name) const;
201 201
202 // Returns a vector containing all of the subdirectories in this directory. 202 // Returns a vector containing all of the subdirectories in this directory.
203 virtual std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const = 0; 203 virtual std::vector<VirtualDir> GetSubdirectories() const = 0;
204 // Returns the directory with name matching name. Returns nullptr if directory dosen't have a 204 // Returns the directory with name matching name. Returns nullptr if directory dosen't have a
205 // directory with name. 205 // directory with name.
206 virtual std::shared_ptr<VfsDirectory> GetSubdirectory(std::string_view name) const; 206 virtual VirtualDir GetSubdirectory(std::string_view name) const;
207 207
208 // Returns whether or not the directory can be written to. 208 // Returns whether or not the directory can be written to.
209 virtual bool IsWritable() const = 0; 209 virtual bool IsWritable() const = 0;
@@ -219,31 +219,31 @@ public:
219 virtual std::size_t GetSize() const; 219 virtual std::size_t GetSize() const;
220 // Returns the parent directory of this directory. Returns nullptr if this directory is root or 220 // Returns the parent directory of this directory. Returns nullptr if this directory is root or
221 // has no parent. 221 // has no parent.
222 virtual std::shared_ptr<VfsDirectory> GetParentDirectory() const = 0; 222 virtual VirtualDir GetParentDirectory() const = 0;
223 223
224 // Creates a new subdirectory with name name. Returns a pointer to the new directory or nullptr 224 // Creates a new subdirectory with name name. Returns a pointer to the new directory or nullptr
225 // if the operation failed. 225 // if the operation failed.
226 virtual std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) = 0; 226 virtual VirtualDir CreateSubdirectory(std::string_view name) = 0;
227 // Creates a new file with name name. Returns a pointer to the new file or nullptr if the 227 // Creates a new file with name name. Returns a pointer to the new file or nullptr if the
228 // operation failed. 228 // operation failed.
229 virtual std::shared_ptr<VfsFile> CreateFile(std::string_view name) = 0; 229 virtual VirtualFile CreateFile(std::string_view name) = 0;
230 230
231 // Creates a new file at the path relative to this directory. Also creates directories if 231 // Creates a new file at the path relative to this directory. Also creates directories if
232 // they do not exist and is supported by this implementation. Returns nullptr on any failure. 232 // they do not exist and is supported by this implementation. Returns nullptr on any failure.
233 virtual std::shared_ptr<VfsFile> CreateFileRelative(std::string_view path); 233 virtual VirtualFile CreateFileRelative(std::string_view path);
234 234
235 // Creates a new file at the path relative to root of this directory. Also creates directories 235 // Creates a new file at the path relative to root of this directory. Also creates directories
236 // if they do not exist and is supported by this implementation. Returns nullptr on any failure. 236 // if they do not exist and is supported by this implementation. Returns nullptr on any failure.
237 virtual std::shared_ptr<VfsFile> CreateFileAbsolute(std::string_view path); 237 virtual VirtualFile CreateFileAbsolute(std::string_view path);
238 238
239 // Creates a new directory at the path relative to this directory. Also creates directories if 239 // Creates a new directory at the path relative to this directory. Also creates directories if
240 // they do not exist and is supported by this implementation. Returns nullptr on any failure. 240 // they do not exist and is supported by this implementation. Returns nullptr on any failure.
241 virtual std::shared_ptr<VfsDirectory> CreateDirectoryRelative(std::string_view path); 241 virtual VirtualDir CreateDirectoryRelative(std::string_view path);
242 242
243 // Creates a new directory at the path relative to root of this directory. Also creates 243 // Creates a new directory at the path relative to root of this directory. Also creates
244 // directories if they do not exist and is supported by this implementation. Returns nullptr on 244 // directories if they do not exist and is supported by this implementation. Returns nullptr on
245 // any failure. 245 // any failure.
246 virtual std::shared_ptr<VfsDirectory> CreateDirectoryAbsolute(std::string_view path); 246 virtual VirtualDir CreateDirectoryAbsolute(std::string_view path);
247 247
248 // Deletes the subdirectory with the given name and returns true on success. 248 // Deletes the subdirectory with the given name and returns true on success.
249 virtual bool DeleteSubdirectory(std::string_view name) = 0; 249 virtual bool DeleteSubdirectory(std::string_view name) = 0;
@@ -280,12 +280,12 @@ class ReadOnlyVfsDirectory : public VfsDirectory {
280public: 280public:
281 bool IsWritable() const override; 281 bool IsWritable() const override;
282 bool IsReadable() const override; 282 bool IsReadable() const override;
283 std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; 283 VirtualDir CreateSubdirectory(std::string_view name) override;
284 std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; 284 VirtualFile CreateFile(std::string_view name) override;
285 std::shared_ptr<VfsFile> CreateFileAbsolute(std::string_view path) override; 285 VirtualFile CreateFileAbsolute(std::string_view path) override;
286 std::shared_ptr<VfsFile> CreateFileRelative(std::string_view path) override; 286 VirtualFile CreateFileRelative(std::string_view path) override;
287 std::shared_ptr<VfsDirectory> CreateDirectoryAbsolute(std::string_view path) override; 287 VirtualDir CreateDirectoryAbsolute(std::string_view path) override;
288 std::shared_ptr<VfsDirectory> CreateDirectoryRelative(std::string_view path) override; 288 VirtualDir CreateDirectoryRelative(std::string_view path) override;
289 bool DeleteSubdirectory(std::string_view name) override; 289 bool DeleteSubdirectory(std::string_view name) override;
290 bool DeleteSubdirectoryRecursive(std::string_view name) override; 290 bool DeleteSubdirectoryRecursive(std::string_view name) override;
291 bool CleanSubdirectoryRecursive(std::string_view name) override; 291 bool CleanSubdirectoryRecursive(std::string_view name) override;
diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp
index e0ff70174..3c5a7d87a 100644
--- a/src/core/file_sys/vfs_concat.cpp
+++ b/src/core/file_sys/vfs_concat.cpp
@@ -46,7 +46,7 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::vector<VirtualFile> f
46 if (files.size() == 1) 46 if (files.size() == 1)
47 return files[0]; 47 return files[0];
48 48
49 return std::shared_ptr<VfsFile>(new ConcatenatedVfsFile(std::move(files), std::move(name))); 49 return VirtualFile(new ConcatenatedVfsFile(std::move(files), std::move(name)));
50} 50}
51 51
52VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, 52VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte,
@@ -71,20 +71,23 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte,
71 if (files.begin()->first != 0) 71 if (files.begin()->first != 0)
72 files.emplace(0, std::make_shared<StaticVfsFile>(filler_byte, files.begin()->first)); 72 files.emplace(0, std::make_shared<StaticVfsFile>(filler_byte, files.begin()->first));
73 73
74 return std::shared_ptr<VfsFile>(new ConcatenatedVfsFile(std::move(files), std::move(name))); 74 return VirtualFile(new ConcatenatedVfsFile(std::move(files), std::move(name)));
75} 75}
76 76
77std::string ConcatenatedVfsFile::GetName() const { 77std::string ConcatenatedVfsFile::GetName() const {
78 if (files.empty()) 78 if (files.empty()) {
79 return ""; 79 return "";
80 if (!name.empty()) 80 }
81 if (!name.empty()) {
81 return name; 82 return name;
83 }
82 return files.begin()->second->GetName(); 84 return files.begin()->second->GetName();
83} 85}
84 86
85std::size_t ConcatenatedVfsFile::GetSize() const { 87std::size_t ConcatenatedVfsFile::GetSize() const {
86 if (files.empty()) 88 if (files.empty()) {
87 return 0; 89 return 0;
90 }
88 return files.rbegin()->first + files.rbegin()->second->GetSize(); 91 return files.rbegin()->first + files.rbegin()->second->GetSize();
89} 92}
90 93
@@ -92,9 +95,10 @@ bool ConcatenatedVfsFile::Resize(std::size_t new_size) {
92 return false; 95 return false;
93} 96}
94 97
95std::shared_ptr<VfsDirectory> ConcatenatedVfsFile::GetContainingDirectory() const { 98VirtualDir ConcatenatedVfsFile::GetContainingDirectory() const {
96 if (files.empty()) 99 if (files.empty()) {
97 return nullptr; 100 return nullptr;
101 }
98 return files.begin()->second->GetContainingDirectory(); 102 return files.begin()->second->GetContainingDirectory();
99} 103}
100 104
diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h
index 7a26343c0..287c72555 100644
--- a/src/core/file_sys/vfs_concat.h
+++ b/src/core/file_sys/vfs_concat.h
@@ -31,7 +31,7 @@ public:
31 std::string GetName() const override; 31 std::string GetName() const override;
32 std::size_t GetSize() const override; 32 std::size_t GetSize() const override;
33 bool Resize(std::size_t new_size) override; 33 bool Resize(std::size_t new_size) override;
34 std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; 34 VirtualDir GetContainingDirectory() const override;
35 bool IsWritable() const override; 35 bool IsWritable() const override;
36 bool IsReadable() const override; 36 bool IsReadable() const override;
37 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; 37 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
diff --git a/src/core/file_sys/vfs_layered.cpp b/src/core/file_sys/vfs_layered.cpp
index 338e398da..434b03cec 100644
--- a/src/core/file_sys/vfs_layered.cpp
+++ b/src/core/file_sys/vfs_layered.cpp
@@ -20,10 +20,10 @@ VirtualDir LayeredVfsDirectory::MakeLayeredDirectory(std::vector<VirtualDir> dir
20 if (dirs.size() == 1) 20 if (dirs.size() == 1)
21 return dirs[0]; 21 return dirs[0];
22 22
23 return std::shared_ptr<VfsDirectory>(new LayeredVfsDirectory(std::move(dirs), std::move(name))); 23 return VirtualDir(new LayeredVfsDirectory(std::move(dirs), std::move(name)));
24} 24}
25 25
26std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFileRelative(std::string_view path) const { 26VirtualFile LayeredVfsDirectory::GetFileRelative(std::string_view path) const {
27 for (const auto& layer : dirs) { 27 for (const auto& layer : dirs) {
28 const auto file = layer->GetFileRelative(path); 28 const auto file = layer->GetFileRelative(path);
29 if (file != nullptr) 29 if (file != nullptr)
@@ -33,23 +33,23 @@ std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFileRelative(std::string_view p
33 return nullptr; 33 return nullptr;
34} 34}
35 35
36std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetDirectoryRelative( 36VirtualDir LayeredVfsDirectory::GetDirectoryRelative(std::string_view path) const {
37 std::string_view path) const {
38 std::vector<VirtualDir> out; 37 std::vector<VirtualDir> out;
39 for (const auto& layer : dirs) { 38 for (const auto& layer : dirs) {
40 auto dir = layer->GetDirectoryRelative(path); 39 auto dir = layer->GetDirectoryRelative(path);
41 if (dir != nullptr) 40 if (dir != nullptr) {
42 out.push_back(std::move(dir)); 41 out.push_back(std::move(dir));
42 }
43 } 43 }
44 44
45 return MakeLayeredDirectory(std::move(out)); 45 return MakeLayeredDirectory(std::move(out));
46} 46}
47 47
48std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFile(std::string_view name) const { 48VirtualFile LayeredVfsDirectory::GetFile(std::string_view name) const {
49 return GetFileRelative(name); 49 return GetFileRelative(name);
50} 50}
51 51
52std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetSubdirectory(std::string_view name) const { 52VirtualDir LayeredVfsDirectory::GetSubdirectory(std::string_view name) const {
53 return GetDirectoryRelative(name); 53 return GetDirectoryRelative(name);
54} 54}
55 55
@@ -57,7 +57,7 @@ std::string LayeredVfsDirectory::GetFullPath() const {
57 return dirs[0]->GetFullPath(); 57 return dirs[0]->GetFullPath();
58} 58}
59 59
60std::vector<std::shared_ptr<VfsFile>> LayeredVfsDirectory::GetFiles() const { 60std::vector<VirtualFile> LayeredVfsDirectory::GetFiles() const {
61 std::vector<VirtualFile> out; 61 std::vector<VirtualFile> out;
62 for (const auto& layer : dirs) { 62 for (const auto& layer : dirs) {
63 for (const auto& file : layer->GetFiles()) { 63 for (const auto& file : layer->GetFiles()) {
@@ -72,7 +72,7 @@ std::vector<std::shared_ptr<VfsFile>> LayeredVfsDirectory::GetFiles() const {
72 return out; 72 return out;
73} 73}
74 74
75std::vector<std::shared_ptr<VfsDirectory>> LayeredVfsDirectory::GetSubdirectories() const { 75std::vector<VirtualDir> LayeredVfsDirectory::GetSubdirectories() const {
76 std::vector<std::string> names; 76 std::vector<std::string> names;
77 for (const auto& layer : dirs) { 77 for (const auto& layer : dirs) {
78 for (const auto& sd : layer->GetSubdirectories()) { 78 for (const auto& sd : layer->GetSubdirectories()) {
@@ -101,15 +101,15 @@ std::string LayeredVfsDirectory::GetName() const {
101 return name.empty() ? dirs[0]->GetName() : name; 101 return name.empty() ? dirs[0]->GetName() : name;
102} 102}
103 103
104std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetParentDirectory() const { 104VirtualDir LayeredVfsDirectory::GetParentDirectory() const {
105 return dirs[0]->GetParentDirectory(); 105 return dirs[0]->GetParentDirectory();
106} 106}
107 107
108std::shared_ptr<VfsDirectory> LayeredVfsDirectory::CreateSubdirectory(std::string_view name) { 108VirtualDir LayeredVfsDirectory::CreateSubdirectory(std::string_view name) {
109 return nullptr; 109 return nullptr;
110} 110}
111 111
112std::shared_ptr<VfsFile> LayeredVfsDirectory::CreateFile(std::string_view name) { 112VirtualFile LayeredVfsDirectory::CreateFile(std::string_view name) {
113 return nullptr; 113 return nullptr;
114} 114}
115 115
diff --git a/src/core/file_sys/vfs_layered.h b/src/core/file_sys/vfs_layered.h
index 8a25c3428..6d7513ac6 100644
--- a/src/core/file_sys/vfs_layered.h
+++ b/src/core/file_sys/vfs_layered.h
@@ -21,20 +21,20 @@ public:
21 /// Wrapper function to allow for more efficient handling of dirs.size() == 0, 1 cases. 21 /// Wrapper function to allow for more efficient handling of dirs.size() == 0, 1 cases.
22 static VirtualDir MakeLayeredDirectory(std::vector<VirtualDir> dirs, std::string name = ""); 22 static VirtualDir MakeLayeredDirectory(std::vector<VirtualDir> dirs, std::string name = "");
23 23
24 std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const override; 24 VirtualFile GetFileRelative(std::string_view path) const override;
25 std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const override; 25 VirtualDir GetDirectoryRelative(std::string_view path) const override;
26 std::shared_ptr<VfsFile> GetFile(std::string_view name) const override; 26 VirtualFile GetFile(std::string_view name) const override;
27 std::shared_ptr<VfsDirectory> GetSubdirectory(std::string_view name) const override; 27 VirtualDir GetSubdirectory(std::string_view name) const override;
28 std::string GetFullPath() const override; 28 std::string GetFullPath() const override;
29 29
30 std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; 30 std::vector<VirtualFile> GetFiles() const override;
31 std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; 31 std::vector<VirtualDir> GetSubdirectories() const override;
32 bool IsWritable() const override; 32 bool IsWritable() const override;
33 bool IsReadable() const override; 33 bool IsReadable() const override;
34 std::string GetName() const override; 34 std::string GetName() const override;
35 std::shared_ptr<VfsDirectory> GetParentDirectory() const override; 35 VirtualDir GetParentDirectory() const override;
36 std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; 36 VirtualDir CreateSubdirectory(std::string_view name) override;
37 std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; 37 VirtualFile CreateFile(std::string_view name) override;
38 bool DeleteSubdirectory(std::string_view name) override; 38 bool DeleteSubdirectory(std::string_view name) override;
39 bool DeleteFile(std::string_view name) override; 39 bool DeleteFile(std::string_view name) override;
40 bool Rename(std::string_view name) override; 40 bool Rename(std::string_view name) override;
diff --git a/src/core/file_sys/vfs_offset.cpp b/src/core/file_sys/vfs_offset.cpp
index 7714d3de5..056737b54 100644
--- a/src/core/file_sys/vfs_offset.cpp
+++ b/src/core/file_sys/vfs_offset.cpp
@@ -9,7 +9,7 @@
9 9
10namespace FileSys { 10namespace FileSys {
11 11
12OffsetVfsFile::OffsetVfsFile(std::shared_ptr<VfsFile> file_, std::size_t size_, std::size_t offset_, 12OffsetVfsFile::OffsetVfsFile(VirtualFile file_, std::size_t size_, std::size_t offset_,
13 std::string name_, VirtualDir parent_) 13 std::string name_, VirtualDir parent_)
14 : file(file_), offset(offset_), size(size_), name(std::move(name_)), 14 : file(file_), offset(offset_), size(size_), name(std::move(name_)),
15 parent(parent_ == nullptr ? file->GetContainingDirectory() : std::move(parent_)) {} 15 parent(parent_ == nullptr ? file->GetContainingDirectory() : std::move(parent_)) {}
@@ -37,7 +37,7 @@ bool OffsetVfsFile::Resize(std::size_t new_size) {
37 return true; 37 return true;
38} 38}
39 39
40std::shared_ptr<VfsDirectory> OffsetVfsFile::GetContainingDirectory() const { 40VirtualDir OffsetVfsFile::GetContainingDirectory() const {
41 return parent; 41 return parent;
42} 42}
43 43
diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h
index f7b7a3256..b2ccc5c7b 100644
--- a/src/core/file_sys/vfs_offset.h
+++ b/src/core/file_sys/vfs_offset.h
@@ -17,14 +17,14 @@ namespace FileSys {
17// the size of this wrapper. 17// the size of this wrapper.
18class OffsetVfsFile : public VfsFile { 18class OffsetVfsFile : public VfsFile {
19public: 19public:
20 OffsetVfsFile(std::shared_ptr<VfsFile> file, std::size_t size, std::size_t offset = 0, 20 OffsetVfsFile(VirtualFile file, std::size_t size, std::size_t offset = 0,
21 std::string new_name = "", VirtualDir new_parent = nullptr); 21 std::string new_name = "", VirtualDir new_parent = nullptr);
22 ~OffsetVfsFile() override; 22 ~OffsetVfsFile() override;
23 23
24 std::string GetName() const override; 24 std::string GetName() const override;
25 std::size_t GetSize() const override; 25 std::size_t GetSize() const override;
26 bool Resize(std::size_t new_size) override; 26 bool Resize(std::size_t new_size) override;
27 std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; 27 VirtualDir GetContainingDirectory() const override;
28 bool IsWritable() const override; 28 bool IsWritable() const override;
29 bool IsReadable() const override; 29 bool IsReadable() const override;
30 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; 30 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
@@ -42,7 +42,7 @@ public:
42private: 42private:
43 std::size_t TrimToFit(std::size_t r_size, std::size_t r_offset) const; 43 std::size_t TrimToFit(std::size_t r_size, std::size_t r_offset) const;
44 44
45 std::shared_ptr<VfsFile> file; 45 VirtualFile file;
46 std::size_t offset; 46 std::size_t offset;
47 std::size_t size; 47 std::size_t size;
48 std::string name; 48 std::string name;
diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp
index 488687ba9..a287eebe3 100644
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -263,7 +263,7 @@ bool RealVfsFile::Resize(std::size_t new_size) {
263 return backing->Resize(new_size); 263 return backing->Resize(new_size);
264} 264}
265 265
266std::shared_ptr<VfsDirectory> RealVfsFile::GetContainingDirectory() const { 266VirtualDir RealVfsFile::GetContainingDirectory() const {
267 return base.OpenDirectory(parent_path, perms); 267 return base.OpenDirectory(parent_path, perms);
268} 268}
269 269
@@ -352,7 +352,7 @@ RealVfsDirectory::RealVfsDirectory(RealVfsFilesystem& base_, const std::string&
352 352
353RealVfsDirectory::~RealVfsDirectory() = default; 353RealVfsDirectory::~RealVfsDirectory() = default;
354 354
355std::shared_ptr<VfsFile> RealVfsDirectory::GetFileRelative(std::string_view path) const { 355VirtualFile RealVfsDirectory::GetFileRelative(std::string_view path) const {
356 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); 356 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path));
357 if (!FS::Exists(full_path) || FS::IsDirectory(full_path)) { 357 if (!FS::Exists(full_path) || FS::IsDirectory(full_path)) {
358 return nullptr; 358 return nullptr;
@@ -360,7 +360,7 @@ std::shared_ptr<VfsFile> RealVfsDirectory::GetFileRelative(std::string_view path
360 return base.OpenFile(full_path, perms); 360 return base.OpenFile(full_path, perms);
361} 361}
362 362
363std::shared_ptr<VfsDirectory> RealVfsDirectory::GetDirectoryRelative(std::string_view path) const { 363VirtualDir RealVfsDirectory::GetDirectoryRelative(std::string_view path) const {
364 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); 364 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path));
365 if (!FS::Exists(full_path) || !FS::IsDirectory(full_path)) { 365 if (!FS::Exists(full_path) || !FS::IsDirectory(full_path)) {
366 return nullptr; 366 return nullptr;
@@ -368,20 +368,20 @@ std::shared_ptr<VfsDirectory> RealVfsDirectory::GetDirectoryRelative(std::string
368 return base.OpenDirectory(full_path, perms); 368 return base.OpenDirectory(full_path, perms);
369} 369}
370 370
371std::shared_ptr<VfsFile> RealVfsDirectory::GetFile(std::string_view name) const { 371VirtualFile RealVfsDirectory::GetFile(std::string_view name) const {
372 return GetFileRelative(name); 372 return GetFileRelative(name);
373} 373}
374 374
375std::shared_ptr<VfsDirectory> RealVfsDirectory::GetSubdirectory(std::string_view name) const { 375VirtualDir RealVfsDirectory::GetSubdirectory(std::string_view name) const {
376 return GetDirectoryRelative(name); 376 return GetDirectoryRelative(name);
377} 377}
378 378
379std::shared_ptr<VfsFile> RealVfsDirectory::CreateFileRelative(std::string_view path) { 379VirtualFile RealVfsDirectory::CreateFileRelative(std::string_view path) {
380 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); 380 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path));
381 return base.CreateFile(full_path, perms); 381 return base.CreateFile(full_path, perms);
382} 382}
383 383
384std::shared_ptr<VfsDirectory> RealVfsDirectory::CreateDirectoryRelative(std::string_view path) { 384VirtualDir RealVfsDirectory::CreateDirectoryRelative(std::string_view path) {
385 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); 385 const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path));
386 return base.CreateDirectory(full_path, perms); 386 return base.CreateDirectory(full_path, perms);
387} 387}
@@ -391,11 +391,11 @@ bool RealVfsDirectory::DeleteSubdirectoryRecursive(std::string_view name) {
391 return base.DeleteDirectory(full_path); 391 return base.DeleteDirectory(full_path);
392} 392}
393 393
394std::vector<std::shared_ptr<VfsFile>> RealVfsDirectory::GetFiles() const { 394std::vector<VirtualFile> RealVfsDirectory::GetFiles() const {
395 return IterateEntries<RealVfsFile, VfsFile>(); 395 return IterateEntries<RealVfsFile, VfsFile>();
396} 396}
397 397
398std::vector<std::shared_ptr<VfsDirectory>> RealVfsDirectory::GetSubdirectories() const { 398std::vector<VirtualDir> RealVfsDirectory::GetSubdirectories() const {
399 return IterateEntries<RealVfsDirectory, VfsDirectory>(); 399 return IterateEntries<RealVfsDirectory, VfsDirectory>();
400} 400}
401 401
@@ -411,7 +411,7 @@ std::string RealVfsDirectory::GetName() const {
411 return path_components.back(); 411 return path_components.back();
412} 412}
413 413
414std::shared_ptr<VfsDirectory> RealVfsDirectory::GetParentDirectory() const { 414VirtualDir RealVfsDirectory::GetParentDirectory() const {
415 if (path_components.size() <= 1) { 415 if (path_components.size() <= 1) {
416 return nullptr; 416 return nullptr;
417 } 417 }
@@ -419,12 +419,12 @@ std::shared_ptr<VfsDirectory> RealVfsDirectory::GetParentDirectory() const {
419 return base.OpenDirectory(parent_path, perms); 419 return base.OpenDirectory(parent_path, perms);
420} 420}
421 421
422std::shared_ptr<VfsDirectory> RealVfsDirectory::CreateSubdirectory(std::string_view name) { 422VirtualDir RealVfsDirectory::CreateSubdirectory(std::string_view name) {
423 const std::string subdir_path = (path + DIR_SEP).append(name); 423 const std::string subdir_path = (path + DIR_SEP).append(name);
424 return base.CreateDirectory(subdir_path, perms); 424 return base.CreateDirectory(subdir_path, perms);
425} 425}
426 426
427std::shared_ptr<VfsFile> RealVfsDirectory::CreateFile(std::string_view name) { 427VirtualFile RealVfsDirectory::CreateFile(std::string_view name) {
428 const std::string file_path = (path + DIR_SEP).append(name); 428 const std::string file_path = (path + DIR_SEP).append(name);
429 return base.CreateFile(file_path, perms); 429 return base.CreateFile(file_path, perms);
430} 430}
diff --git a/src/core/file_sys/vfs_real.h b/src/core/file_sys/vfs_real.h
index 0b537b22c..23e99865e 100644
--- a/src/core/file_sys/vfs_real.h
+++ b/src/core/file_sys/vfs_real.h
@@ -50,7 +50,7 @@ public:
50 std::string GetName() const override; 50 std::string GetName() const override;
51 std::size_t GetSize() const override; 51 std::size_t GetSize() const override;
52 bool Resize(std::size_t new_size) override; 52 bool Resize(std::size_t new_size) override;
53 std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; 53 VirtualDir GetContainingDirectory() const override;
54 bool IsWritable() const override; 54 bool IsWritable() const override;
55 bool IsReadable() const override; 55 bool IsReadable() const override;
56 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; 56 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
@@ -79,21 +79,21 @@ class RealVfsDirectory : public VfsDirectory {
79public: 79public:
80 ~RealVfsDirectory() override; 80 ~RealVfsDirectory() override;
81 81
82 std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const override; 82 VirtualFile GetFileRelative(std::string_view path) const override;
83 std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const override; 83 VirtualDir GetDirectoryRelative(std::string_view path) const override;
84 std::shared_ptr<VfsFile> GetFile(std::string_view name) const override; 84 VirtualFile GetFile(std::string_view name) const override;
85 std::shared_ptr<VfsDirectory> GetSubdirectory(std::string_view name) const override; 85 VirtualDir GetSubdirectory(std::string_view name) const override;
86 std::shared_ptr<VfsFile> CreateFileRelative(std::string_view path) override; 86 VirtualFile CreateFileRelative(std::string_view path) override;
87 std::shared_ptr<VfsDirectory> CreateDirectoryRelative(std::string_view path) override; 87 VirtualDir CreateDirectoryRelative(std::string_view path) override;
88 bool DeleteSubdirectoryRecursive(std::string_view name) override; 88 bool DeleteSubdirectoryRecursive(std::string_view name) override;
89 std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; 89 std::vector<VirtualFile> GetFiles() const override;
90 std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; 90 std::vector<VirtualDir> GetSubdirectories() const override;
91 bool IsWritable() const override; 91 bool IsWritable() const override;
92 bool IsReadable() const override; 92 bool IsReadable() const override;
93 std::string GetName() const override; 93 std::string GetName() const override;
94 std::shared_ptr<VfsDirectory> GetParentDirectory() const override; 94 VirtualDir GetParentDirectory() const override;
95 std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; 95 VirtualDir CreateSubdirectory(std::string_view name) override;
96 std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; 96 VirtualFile CreateFile(std::string_view name) override;
97 bool DeleteSubdirectory(std::string_view name) override; 97 bool DeleteSubdirectory(std::string_view name) override;
98 bool DeleteFile(std::string_view name) override; 98 bool DeleteFile(std::string_view name) override;
99 bool Rename(std::string_view name) override; 99 bool Rename(std::string_view name) override;
diff --git a/src/core/file_sys/vfs_static.h b/src/core/file_sys/vfs_static.h
index 8b27c30fa..c840b24b9 100644
--- a/src/core/file_sys/vfs_static.h
+++ b/src/core/file_sys/vfs_static.h
@@ -31,7 +31,7 @@ public:
31 return true; 31 return true;
32 } 32 }
33 33
34 std::shared_ptr<VfsDirectory> GetContainingDirectory() const override { 34 VirtualDir GetContainingDirectory() const override {
35 return parent; 35 return parent;
36 } 36 }
37 37
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 75fc04302..c1ec1e645 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -25,7 +25,7 @@ bool VectorVfsFile::Resize(size_t new_size) {
25 return true; 25 return true;
26} 26}
27 27
28std::shared_ptr<VfsDirectory> VectorVfsFile::GetContainingDirectory() const { 28VirtualDir VectorVfsFile::GetContainingDirectory() const {
29 return parent; 29 return parent;
30} 30}
31 31
@@ -68,11 +68,11 @@ VectorVfsDirectory::VectorVfsDirectory(std::vector<VirtualFile> files_,
68 68
69VectorVfsDirectory::~VectorVfsDirectory() = default; 69VectorVfsDirectory::~VectorVfsDirectory() = default;
70 70
71std::vector<std::shared_ptr<VfsFile>> VectorVfsDirectory::GetFiles() const { 71std::vector<VirtualFile> VectorVfsDirectory::GetFiles() const {
72 return files; 72 return files;
73} 73}
74 74
75std::vector<std::shared_ptr<VfsDirectory>> VectorVfsDirectory::GetSubdirectories() const { 75std::vector<VirtualDir> VectorVfsDirectory::GetSubdirectories() const {
76 return dirs; 76 return dirs;
77} 77}
78 78
@@ -88,7 +88,7 @@ std::string VectorVfsDirectory::GetName() const {
88 return name; 88 return name;
89} 89}
90 90
91std::shared_ptr<VfsDirectory> VectorVfsDirectory::GetParentDirectory() const { 91VirtualDir VectorVfsDirectory::GetParentDirectory() const {
92 return parent; 92 return parent;
93} 93}
94 94
@@ -116,11 +116,11 @@ bool VectorVfsDirectory::Rename(std::string_view name_) {
116 return true; 116 return true;
117} 117}
118 118
119std::shared_ptr<VfsDirectory> VectorVfsDirectory::CreateSubdirectory(std::string_view name) { 119VirtualDir VectorVfsDirectory::CreateSubdirectory(std::string_view name) {
120 return nullptr; 120 return nullptr;
121} 121}
122 122
123std::shared_ptr<VfsFile> VectorVfsDirectory::CreateFile(std::string_view name) { 123VirtualFile VectorVfsDirectory::CreateFile(std::string_view name) {
124 return nullptr; 124 return nullptr;
125} 125}
126 126
diff --git a/src/core/file_sys/vfs_vector.h b/src/core/file_sys/vfs_vector.h
index 95d3da2f2..2aff9ca34 100644
--- a/src/core/file_sys/vfs_vector.h
+++ b/src/core/file_sys/vfs_vector.h
@@ -17,9 +17,9 @@ namespace FileSys {
17template <std::size_t size> 17template <std::size_t size>
18class ArrayVfsFile : public VfsFile { 18class ArrayVfsFile : public VfsFile {
19public: 19public:
20 explicit ArrayVfsFile(const std::array<u8, size>& data, std::string name = "", 20 explicit ArrayVfsFile(const std::array<u8, size>& data_, std::string name_ = "",
21 VirtualDir parent = nullptr) 21 VirtualDir parent_ = nullptr)
22 : data(data), name(std::move(name)), parent(std::move(parent)) {} 22 : data(data_), name(std::move(name_)), parent(std::move(parent_)) {}
23 23
24 std::string GetName() const override { 24 std::string GetName() const override {
25 return name; 25 return name;
@@ -33,7 +33,7 @@ public:
33 return false; 33 return false;
34 } 34 }
35 35
36 std::shared_ptr<VfsDirectory> GetContainingDirectory() const override { 36 VirtualDir GetContainingDirectory() const override {
37 return parent; 37 return parent;
38 } 38 }
39 39
@@ -51,12 +51,12 @@ public:
51 return read; 51 return read;
52 } 52 }
53 53
54 std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override { 54 std::size_t Write(const u8* data_, std::size_t length, std::size_t offset) override {
55 return 0; 55 return 0;
56 } 56 }
57 57
58 bool Rename(std::string_view name) override { 58 bool Rename(std::string_view new_name) override {
59 this->name = name; 59 name = new_name;
60 return true; 60 return true;
61 } 61 }
62 62
@@ -82,7 +82,7 @@ public:
82 std::string GetName() const override; 82 std::string GetName() const override;
83 std::size_t GetSize() const override; 83 std::size_t GetSize() const override;
84 bool Resize(std::size_t new_size) override; 84 bool Resize(std::size_t new_size) override;
85 std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; 85 VirtualDir GetContainingDirectory() const override;
86 bool IsWritable() const override; 86 bool IsWritable() const override;
87 bool IsReadable() const override; 87 bool IsReadable() const override;
88 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; 88 std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
@@ -106,17 +106,17 @@ public:
106 VirtualDir parent = nullptr); 106 VirtualDir parent = nullptr);
107 ~VectorVfsDirectory() override; 107 ~VectorVfsDirectory() override;
108 108
109 std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; 109 std::vector<VirtualFile> GetFiles() const override;
110 std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; 110 std::vector<VirtualDir> GetSubdirectories() const override;
111 bool IsWritable() const override; 111 bool IsWritable() const override;
112 bool IsReadable() const override; 112 bool IsReadable() const override;
113 std::string GetName() const override; 113 std::string GetName() const override;
114 std::shared_ptr<VfsDirectory> GetParentDirectory() const override; 114 VirtualDir GetParentDirectory() const override;
115 bool DeleteSubdirectory(std::string_view name) override; 115 bool DeleteSubdirectory(std::string_view name) override;
116 bool DeleteFile(std::string_view name) override; 116 bool DeleteFile(std::string_view name) override;
117 bool Rename(std::string_view name) override; 117 bool Rename(std::string_view name) override;
118 std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; 118 VirtualDir CreateSubdirectory(std::string_view name) override;
119 std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; 119 VirtualFile CreateFile(std::string_view name) override;
120 120
121 virtual void AddFile(VirtualFile file); 121 virtual void AddFile(VirtualFile file);
122 virtual void AddDirectory(VirtualDir dir); 122 virtual void AddDirectory(VirtualDir dir);
diff --git a/src/core/file_sys/xts_archive.cpp b/src/core/file_sys/xts_archive.cpp
index 24c58e7ae..814fd5680 100644
--- a/src/core/file_sys/xts_archive.cpp
+++ b/src/core/file_sys/xts_archive.cpp
@@ -152,11 +152,11 @@ NAXContentType NAX::GetContentType() const {
152 return type; 152 return type;
153} 153}
154 154
155std::vector<std::shared_ptr<VfsFile>> NAX::GetFiles() const { 155std::vector<VirtualFile> NAX::GetFiles() const {
156 return {dec_file}; 156 return {dec_file};
157} 157}
158 158
159std::vector<std::shared_ptr<VfsDirectory>> NAX::GetSubdirectories() const { 159std::vector<VirtualDir> NAX::GetSubdirectories() const {
160 return {}; 160 return {};
161} 161}
162 162
@@ -164,7 +164,7 @@ std::string NAX::GetName() const {
164 return file->GetName(); 164 return file->GetName();
165} 165}
166 166
167std::shared_ptr<VfsDirectory> NAX::GetParentDirectory() const { 167VirtualDir NAX::GetParentDirectory() const {
168 return file->GetContainingDirectory(); 168 return file->GetContainingDirectory();
169} 169}
170 170
diff --git a/src/core/file_sys/xts_archive.h b/src/core/file_sys/xts_archive.h
index c472e226e..63a032b68 100644
--- a/src/core/file_sys/xts_archive.h
+++ b/src/core/file_sys/xts_archive.h
@@ -47,13 +47,13 @@ public:
47 47
48 NAXContentType GetContentType() const; 48 NAXContentType GetContentType() const;
49 49
50 std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; 50 std::vector<VirtualFile> GetFiles() const override;
51 51
52 std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; 52 std::vector<VirtualDir> GetSubdirectories() const override;
53 53
54 std::string GetName() const override; 54 std::string GetName() const override;
55 55
56 std::shared_ptr<VfsDirectory> GetParentDirectory() const override; 56 VirtualDir GetParentDirectory() const override;
57 57
58private: 58private:
59 Loader::ResultStatus Parse(std::string_view path); 59 Loader::ResultStatus Parse(std::string_view path);
diff --git a/src/core/frontend/applets/error.cpp b/src/core/frontend/applets/error.cpp
index 4002a9211..dceb20ff8 100644
--- a/src/core/frontend/applets/error.cpp
+++ b/src/core/frontend/applets/error.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
5#include "core/frontend/applets/error.h" 6#include "core/frontend/applets/error.h"
6 7
7namespace Core::Frontend { 8namespace Core::Frontend {
@@ -10,7 +11,7 @@ ErrorApplet::~ErrorApplet() = default;
10 11
11void DefaultErrorApplet::ShowError(ResultCode error, std::function<void()> finished) const { 12void DefaultErrorApplet::ShowError(ResultCode error, std::function<void()> finished) const {
12 LOG_CRITICAL(Service_Fatal, "Application requested error display: {:04}-{:04} (raw={:08X})", 13 LOG_CRITICAL(Service_Fatal, "Application requested error display: {:04}-{:04} (raw={:08X})",
13 static_cast<u32>(error.module.Value()), error.description.Value(), error.raw); 14 error.module.Value(), error.description.Value(), error.raw);
14} 15}
15 16
16void DefaultErrorApplet::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, 17void DefaultErrorApplet::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time,
@@ -18,7 +19,7 @@ void DefaultErrorApplet::ShowErrorWithTimestamp(ResultCode error, std::chrono::s
18 LOG_CRITICAL( 19 LOG_CRITICAL(
19 Service_Fatal, 20 Service_Fatal,
20 "Application requested error display: {:04X}-{:04X} (raw={:08X}) with timestamp={:016X}", 21 "Application requested error display: {:04X}-{:04X} (raw={:08X}) with timestamp={:016X}",
21 static_cast<u32>(error.module.Value()), error.description.Value(), error.raw, time.count()); 22 error.module.Value(), error.description.Value(), error.raw, time.count());
22} 23}
23 24
24void DefaultErrorApplet::ShowCustomErrorText(ResultCode error, std::string main_text, 25void DefaultErrorApplet::ShowCustomErrorText(ResultCode error, std::string main_text,
@@ -26,7 +27,7 @@ void DefaultErrorApplet::ShowCustomErrorText(ResultCode error, std::string main_
26 std::function<void()> finished) const { 27 std::function<void()> finished) const {
27 LOG_CRITICAL(Service_Fatal, 28 LOG_CRITICAL(Service_Fatal,
28 "Application requested custom error with error_code={:04X}-{:04X} (raw={:08X})", 29 "Application requested custom error with error_code={:04X}-{:04X} (raw={:08X})",
29 static_cast<u32>(error.module.Value()), error.description.Value(), error.raw); 30 error.module.Value(), error.description.Value(), error.raw);
30 LOG_CRITICAL(Service_Fatal, " Main Text: {}", main_text); 31 LOG_CRITICAL(Service_Fatal, " Main Text: {}", main_text);
31 LOG_CRITICAL(Service_Fatal, " Detail Text: {}", detail_text); 32 LOG_CRITICAL(Service_Fatal, " Detail Text: {}", detail_text);
32} 33}
diff --git a/src/core/frontend/applets/general_frontend.cpp b/src/core/frontend/applets/general_frontend.cpp
index c30b36de7..7483ffb76 100644
--- a/src/core/frontend/applets/general_frontend.cpp
+++ b/src/core/frontend/applets/general_frontend.cpp
@@ -53,72 +53,4 @@ void DefaultPhotoViewerApplet::ShowAllPhotos(std::function<void()> finished) con
53 finished(); 53 finished();
54} 54}
55 55
56ECommerceApplet::~ECommerceApplet() = default;
57
58DefaultECommerceApplet::~DefaultECommerceApplet() = default;
59
60void DefaultECommerceApplet::ShowApplicationInformation(
61 std::function<void()> finished, u64 title_id, std::optional<u128> user_id,
62 std::optional<bool> full_display, std::optional<std::string> extra_parameter) {
63 const auto value = user_id.value_or(u128{});
64 LOG_INFO(Service_AM,
65 "Application requested frontend show application information for EShop, "
66 "title_id={:016X}, user_id={:016X}{:016X}, full_display={}, extra_parameter={}",
67 title_id, value[1], value[0],
68 full_display.has_value() ? fmt::format("{}", *full_display) : "null",
69 extra_parameter.value_or("null"));
70 finished();
71}
72
73void DefaultECommerceApplet::ShowAddOnContentList(std::function<void()> finished, u64 title_id,
74 std::optional<u128> user_id,
75 std::optional<bool> full_display) {
76 const auto value = user_id.value_or(u128{});
77 LOG_INFO(Service_AM,
78 "Application requested frontend show add on content list for EShop, "
79 "title_id={:016X}, user_id={:016X}{:016X}, full_display={}",
80 title_id, value[1], value[0],
81 full_display.has_value() ? fmt::format("{}", *full_display) : "null");
82 finished();
83}
84
85void DefaultECommerceApplet::ShowSubscriptionList(std::function<void()> finished, u64 title_id,
86 std::optional<u128> user_id) {
87 const auto value = user_id.value_or(u128{});
88 LOG_INFO(Service_AM,
89 "Application requested frontend show subscription list for EShop, title_id={:016X}, "
90 "user_id={:016X}{:016X}",
91 title_id, value[1], value[0]);
92 finished();
93}
94
95void DefaultECommerceApplet::ShowConsumableItemList(std::function<void()> finished, u64 title_id,
96 std::optional<u128> user_id) {
97 const auto value = user_id.value_or(u128{});
98 LOG_INFO(
99 Service_AM,
100 "Application requested frontend show consumable item list for EShop, title_id={:016X}, "
101 "user_id={:016X}{:016X}",
102 title_id, value[1], value[0]);
103 finished();
104}
105
106void DefaultECommerceApplet::ShowShopHome(std::function<void()> finished, u128 user_id,
107 bool full_display) {
108 LOG_INFO(Service_AM,
109 "Application requested frontend show home menu for EShop, user_id={:016X}{:016X}, "
110 "full_display={}",
111 user_id[1], user_id[0], full_display);
112 finished();
113}
114
115void DefaultECommerceApplet::ShowSettings(std::function<void()> finished, u128 user_id,
116 bool full_display) {
117 LOG_INFO(Service_AM,
118 "Application requested frontend show settings menu for EShop, user_id={:016X}{:016X}, "
119 "full_display={}",
120 user_id[1], user_id[0], full_display);
121 finished();
122}
123
124} // namespace Core::Frontend 56} // namespace Core::Frontend
diff --git a/src/core/frontend/applets/general_frontend.h b/src/core/frontend/applets/general_frontend.h
index 4b63f828e..b713b14ee 100644
--- a/src/core/frontend/applets/general_frontend.h
+++ b/src/core/frontend/applets/general_frontend.h
@@ -58,55 +58,4 @@ public:
58 void ShowAllPhotos(std::function<void()> finished) const override; 58 void ShowAllPhotos(std::function<void()> finished) const override;
59}; 59};
60 60
61class ECommerceApplet {
62public:
63 virtual ~ECommerceApplet();
64
65 // Shows a page with application icons, description, name, and price.
66 virtual void ShowApplicationInformation(std::function<void()> finished, u64 title_id,
67 std::optional<u128> user_id = {},
68 std::optional<bool> full_display = {},
69 std::optional<std::string> extra_parameter = {}) = 0;
70
71 // Shows a page with all of the add on content available for a game, with name, description, and
72 // price.
73 virtual void ShowAddOnContentList(std::function<void()> finished, u64 title_id,
74 std::optional<u128> user_id = {},
75 std::optional<bool> full_display = {}) = 0;
76
77 // Shows a page with all of the subscriptions (recurring payments) for a game, with name,
78 // description, price, and renewal period.
79 virtual void ShowSubscriptionList(std::function<void()> finished, u64 title_id,
80 std::optional<u128> user_id = {}) = 0;
81
82 // Shows a page with a list of any additional game related purchasable items (DLC,
83 // subscriptions, etc) for a particular game, with name, description, type, and price.
84 virtual void ShowConsumableItemList(std::function<void()> finished, u64 title_id,
85 std::optional<u128> user_id = {}) = 0;
86
87 // Shows the home page of the shop.
88 virtual void ShowShopHome(std::function<void()> finished, u128 user_id, bool full_display) = 0;
89
90 // Shows the user settings page of the shop.
91 virtual void ShowSettings(std::function<void()> finished, u128 user_id, bool full_display) = 0;
92};
93
94class DefaultECommerceApplet : public ECommerceApplet {
95public:
96 ~DefaultECommerceApplet() override;
97
98 void ShowApplicationInformation(std::function<void()> finished, u64 title_id,
99 std::optional<u128> user_id, std::optional<bool> full_display,
100 std::optional<std::string> extra_parameter) override;
101 void ShowAddOnContentList(std::function<void()> finished, u64 title_id,
102 std::optional<u128> user_id,
103 std::optional<bool> full_display) override;
104 void ShowSubscriptionList(std::function<void()> finished, u64 title_id,
105 std::optional<u128> user_id) override;
106 void ShowConsumableItemList(std::function<void()> finished, u64 title_id,
107 std::optional<u128> user_id) override;
108 void ShowShopHome(std::function<void()> finished, u128 user_id, bool full_display) override;
109 void ShowSettings(std::function<void()> finished, u128 user_id, bool full_display) override;
110};
111
112} // namespace Core::Frontend 61} // namespace Core::Frontend
diff --git a/src/core/frontend/applets/web_browser.cpp b/src/core/frontend/applets/web_browser.cpp
index 528295ffc..50db6a654 100644
--- a/src/core/frontend/applets/web_browser.cpp
+++ b/src/core/frontend/applets/web_browser.cpp
@@ -11,14 +11,22 @@ WebBrowserApplet::~WebBrowserApplet() = default;
11 11
12DefaultWebBrowserApplet::~DefaultWebBrowserApplet() = default; 12DefaultWebBrowserApplet::~DefaultWebBrowserApplet() = default;
13 13
14void DefaultWebBrowserApplet::OpenPageLocal(std::string_view filename, 14void DefaultWebBrowserApplet::OpenLocalWebPage(
15 std::function<void()> unpack_romfs_callback, 15 std::string_view local_url, std::function<void()> extract_romfs_callback,
16 std::function<void()> finished_callback) { 16 std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const {
17 LOG_INFO(Service_AM, 17 LOG_WARNING(Service_AM, "(STUBBED) called, backend requested to open local web page at {}",
18 "(STUBBED) called - No suitable web browser implementation found to open website page " 18 local_url);
19 "at '{}'!", 19
20 filename); 20 callback(Service::AM::Applets::WebExitReason::WindowClosed, "http://localhost/");
21 finished_callback(); 21}
22
23void DefaultWebBrowserApplet::OpenExternalWebPage(
24 std::string_view external_url,
25 std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const {
26 LOG_WARNING(Service_AM, "(STUBBED) called, backend requested to open external web page at {}",
27 external_url);
28
29 callback(Service::AM::Applets::WebExitReason::WindowClosed, "http://localhost/");
22} 30}
23 31
24} // namespace Core::Frontend 32} // namespace Core::Frontend
diff --git a/src/core/frontend/applets/web_browser.h b/src/core/frontend/applets/web_browser.h
index 110e33bc4..1c5ef19a9 100644
--- a/src/core/frontend/applets/web_browser.h
+++ b/src/core/frontend/applets/web_browser.h
@@ -7,22 +7,34 @@
7#include <functional> 7#include <functional>
8#include <string_view> 8#include <string_view>
9 9
10#include "core/hle/service/am/applets/web_types.h"
11
10namespace Core::Frontend { 12namespace Core::Frontend {
11 13
12class WebBrowserApplet { 14class WebBrowserApplet {
13public: 15public:
14 virtual ~WebBrowserApplet(); 16 virtual ~WebBrowserApplet();
15 17
16 virtual void OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, 18 virtual void OpenLocalWebPage(
17 std::function<void()> finished_callback) = 0; 19 std::string_view local_url, std::function<void()> extract_romfs_callback,
20 std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const = 0;
21
22 virtual void OpenExternalWebPage(
23 std::string_view external_url,
24 std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const = 0;
18}; 25};
19 26
20class DefaultWebBrowserApplet final : public WebBrowserApplet { 27class DefaultWebBrowserApplet final : public WebBrowserApplet {
21public: 28public:
22 ~DefaultWebBrowserApplet() override; 29 ~DefaultWebBrowserApplet() override;
23 30
24 void OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, 31 void OpenLocalWebPage(std::string_view local_url, std::function<void()> extract_romfs_callback,
25 std::function<void()> finished_callback) override; 32 std::function<void(Service::AM::Applets::WebExitReason, std::string)>
33 callback) const override;
34
35 void OpenExternalWebPage(std::string_view external_url,
36 std::function<void(Service::AM::Applets::WebExitReason, std::string)>
37 callback) const override;
26}; 38};
27 39
28} // namespace Core::Frontend 40} // namespace Core::Frontend
diff --git a/src/core/frontend/input_interpreter.cpp b/src/core/frontend/input_interpreter.cpp
new file mode 100644
index 000000000..66ae506cd
--- /dev/null
+++ b/src/core/frontend/input_interpreter.cpp
@@ -0,0 +1,45 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/frontend/input_interpreter.h"
7#include "core/hle/service/hid/controllers/npad.h"
8#include "core/hle/service/hid/hid.h"
9#include "core/hle/service/sm/sm.h"
10
11InputInterpreter::InputInterpreter(Core::System& system)
12 : npad{system.ServiceManager()
13 .GetService<Service::HID::Hid>("hid")
14 ->GetAppletResource()
15 ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad)} {}
16
17InputInterpreter::~InputInterpreter() = default;
18
19void InputInterpreter::PollInput() {
20 const u32 button_state = npad.GetAndResetPressState();
21
22 previous_index = current_index;
23 current_index = (current_index + 1) % button_states.size();
24
25 button_states[current_index] = button_state;
26}
27
28bool InputInterpreter::IsButtonPressedOnce(HIDButton button) const {
29 const bool current_press =
30 (button_states[current_index] & (1U << static_cast<u8>(button))) != 0;
31 const bool previous_press =
32 (button_states[previous_index] & (1U << static_cast<u8>(button))) != 0;
33
34 return current_press && !previous_press;
35}
36
37bool InputInterpreter::IsButtonHeld(HIDButton button) const {
38 u32 held_buttons{button_states[0]};
39
40 for (std::size_t i = 1; i < button_states.size(); ++i) {
41 held_buttons &= button_states[i];
42 }
43
44 return (held_buttons & (1U << static_cast<u8>(button))) != 0;
45}
diff --git a/src/core/frontend/input_interpreter.h b/src/core/frontend/input_interpreter.h
new file mode 100644
index 000000000..fea9aebe6
--- /dev/null
+++ b/src/core/frontend/input_interpreter.h
@@ -0,0 +1,120 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/common_types.h"
10
11namespace Core {
12class System;
13}
14
15namespace Service::HID {
16class Controller_NPad;
17}
18
19enum class HIDButton : u8 {
20 A,
21 B,
22 X,
23 Y,
24 LStick,
25 RStick,
26 L,
27 R,
28 ZL,
29 ZR,
30 Plus,
31 Minus,
32
33 DLeft,
34 DUp,
35 DRight,
36 DDown,
37
38 LStickLeft,
39 LStickUp,
40 LStickRight,
41 LStickDown,
42
43 RStickLeft,
44 RStickUp,
45 RStickRight,
46 RStickDown,
47
48 LeftSL,
49 LeftSR,
50
51 RightSL,
52 RightSR,
53};
54
55/**
56 * The InputInterpreter class interfaces with HID to retrieve button press states.
57 * Input is intended to be polled every 50ms so that a button is considered to be
58 * held down after 400ms has elapsed since the initial button press and subsequent
59 * repeated presses occur every 50ms.
60 */
61class InputInterpreter {
62public:
63 explicit InputInterpreter(Core::System& system);
64 virtual ~InputInterpreter();
65
66 /// Gets a button state from HID and inserts it into the array of button states.
67 void PollInput();
68
69 /**
70 * The specified button is considered to be pressed once
71 * if it is currently pressed and not pressed previously.
72 *
73 * @param button The button to check.
74 *
75 * @returns True when the button is pressed once.
76 */
77 [[nodiscard]] bool IsButtonPressedOnce(HIDButton button) const;
78
79 /**
80 * Checks whether any of the buttons in the parameter list is pressed once.
81 *
82 * @tparam HIDButton The buttons to check.
83 *
84 * @returns True when at least one of the buttons is pressed once.
85 */
86 template <HIDButton... T>
87 [[nodiscard]] bool IsAnyButtonPressedOnce() {
88 return (IsButtonPressedOnce(T) || ...);
89 }
90
91 /**
92 * The specified button is considered to be held down if it is pressed in all 9 button states.
93 *
94 * @param button The button to check.
95 *
96 * @returns True when the button is held down.
97 */
98 [[nodiscard]] bool IsButtonHeld(HIDButton button) const;
99
100 /**
101 * Checks whether any of the buttons in the parameter list is held down.
102 *
103 * @tparam HIDButton The buttons to check.
104 *
105 * @returns True when at least one of the buttons is held down.
106 */
107 template <HIDButton... T>
108 [[nodiscard]] bool IsAnyButtonHeld() {
109 return (IsButtonHeld(T) || ...);
110 }
111
112private:
113 Service::HID::Controller_NPad& npad;
114
115 /// Stores 9 consecutive button states polled from HID.
116 std::array<u32, 9> button_states{};
117
118 std::size_t previous_index{};
119 std::size_t current_index{};
120};
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index d57776ce9..56cc911d1 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -166,8 +166,23 @@ public:
166 ValidateHeader(); 166 ValidateHeader();
167 } 167 }
168 168
169 void PushImpl(s8 value);
170 void PushImpl(s16 value);
171 void PushImpl(s32 value);
172 void PushImpl(s64 value);
173 void PushImpl(u8 value);
174 void PushImpl(u16 value);
175 void PushImpl(u32 value);
176 void PushImpl(u64 value);
177 void PushImpl(float value);
178 void PushImpl(double value);
179 void PushImpl(bool value);
180 void PushImpl(ResultCode value);
181
169 template <typename T> 182 template <typename T>
170 void Push(T value); 183 void Push(T value) {
184 return PushImpl(value);
185 }
171 186
172 template <typename First, typename... Other> 187 template <typename First, typename... Other>
173 void Push(const First& first_value, const Other&... other_values); 188 void Push(const First& first_value, const Other&... other_values);
@@ -215,13 +230,11 @@ private:
215 230
216/// Push /// 231/// Push ///
217 232
218template <> 233inline void ResponseBuilder::PushImpl(s32 value) {
219inline void ResponseBuilder::Push(s32 value) {
220 cmdbuf[index++] = static_cast<u32>(value); 234 cmdbuf[index++] = static_cast<u32>(value);
221} 235}
222 236
223template <> 237inline void ResponseBuilder::PushImpl(u32 value) {
224inline void ResponseBuilder::Push(u32 value) {
225 cmdbuf[index++] = value; 238 cmdbuf[index++] = value;
226} 239}
227 240
@@ -233,62 +246,52 @@ void ResponseBuilder::PushRaw(const T& value) {
233 index += (sizeof(T) + 3) / 4; // round up to word length 246 index += (sizeof(T) + 3) / 4; // round up to word length
234} 247}
235 248
236template <> 249inline void ResponseBuilder::PushImpl(ResultCode value) {
237inline void ResponseBuilder::Push(ResultCode value) {
238 // Result codes are actually 64-bit in the IPC buffer, but only the high part is discarded. 250 // Result codes are actually 64-bit in the IPC buffer, but only the high part is discarded.
239 Push(value.raw); 251 Push(value.raw);
240 Push<u32>(0); 252 Push<u32>(0);
241} 253}
242 254
243template <> 255inline void ResponseBuilder::PushImpl(s8 value) {
244inline void ResponseBuilder::Push(s8 value) {
245 PushRaw(value); 256 PushRaw(value);
246} 257}
247 258
248template <> 259inline void ResponseBuilder::PushImpl(s16 value) {
249inline void ResponseBuilder::Push(s16 value) {
250 PushRaw(value); 260 PushRaw(value);
251} 261}
252 262
253template <> 263inline void ResponseBuilder::PushImpl(s64 value) {
254inline void ResponseBuilder::Push(s64 value) { 264 PushImpl(static_cast<u32>(value));
255 Push(static_cast<u32>(value)); 265 PushImpl(static_cast<u32>(value >> 32));
256 Push(static_cast<u32>(value >> 32));
257} 266}
258 267
259template <> 268inline void ResponseBuilder::PushImpl(u8 value) {
260inline void ResponseBuilder::Push(u8 value) {
261 PushRaw(value); 269 PushRaw(value);
262} 270}
263 271
264template <> 272inline void ResponseBuilder::PushImpl(u16 value) {
265inline void ResponseBuilder::Push(u16 value) {
266 PushRaw(value); 273 PushRaw(value);
267} 274}
268 275
269template <> 276inline void ResponseBuilder::PushImpl(u64 value) {
270inline void ResponseBuilder::Push(u64 value) { 277 PushImpl(static_cast<u32>(value));
271 Push(static_cast<u32>(value)); 278 PushImpl(static_cast<u32>(value >> 32));
272 Push(static_cast<u32>(value >> 32));
273} 279}
274 280
275template <> 281inline void ResponseBuilder::PushImpl(float value) {
276inline void ResponseBuilder::Push(float value) {
277 u32 integral; 282 u32 integral;
278 std::memcpy(&integral, &value, sizeof(u32)); 283 std::memcpy(&integral, &value, sizeof(u32));
279 Push(integral); 284 PushImpl(integral);
280} 285}
281 286
282template <> 287inline void ResponseBuilder::PushImpl(double value) {
283inline void ResponseBuilder::Push(double value) {
284 u64 integral; 288 u64 integral;
285 std::memcpy(&integral, &value, sizeof(u64)); 289 std::memcpy(&integral, &value, sizeof(u64));
286 Push(integral); 290 PushImpl(integral);
287} 291}
288 292
289template <> 293inline void ResponseBuilder::PushImpl(bool value) {
290inline void ResponseBuilder::Push(bool value) { 294 PushImpl(static_cast<u8>(value));
291 Push(static_cast<u8>(value));
292} 295}
293 296
294template <typename First, typename... Other> 297template <typename First, typename... Other>
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 048acd30e..20ffa7d47 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -12,8 +12,9 @@
12#include "core/hle/kernel/address_arbiter.h" 12#include "core/hle/kernel/address_arbiter.h"
13#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/handle_table.h" 14#include "core/hle/kernel/handle_table.h"
15#include "core/hle/kernel/k_scheduler.h"
16#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
15#include "core/hle/kernel/kernel.h" 17#include "core/hle/kernel/kernel.h"
16#include "core/hle/kernel/scheduler.h"
17#include "core/hle/kernel/thread.h" 18#include "core/hle/kernel/thread.h"
18#include "core/hle/kernel/time_manager.h" 19#include "core/hle/kernel/time_manager.h"
19#include "core/hle/result.h" 20#include "core/hle/result.h"
@@ -58,7 +59,7 @@ ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 v
58} 59}
59 60
60ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { 61ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
61 SchedulerLock lock(system.Kernel()); 62 KScopedSchedulerLock lock(system.Kernel());
62 const std::vector<std::shared_ptr<Thread>> waiting_threads = 63 const std::vector<std::shared_ptr<Thread>> waiting_threads =
63 GetThreadsWaitingOnAddress(address); 64 GetThreadsWaitingOnAddress(address);
64 WakeThreads(waiting_threads, num_to_wake); 65 WakeThreads(waiting_threads, num_to_wake);
@@ -67,7 +68,7 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
67 68
68ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, 69ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
69 s32 num_to_wake) { 70 s32 num_to_wake) {
70 SchedulerLock lock(system.Kernel()); 71 KScopedSchedulerLock lock(system.Kernel());
71 auto& memory = system.Memory(); 72 auto& memory = system.Memory();
72 73
73 // Ensure that we can write to the address. 74 // Ensure that we can write to the address.
@@ -92,7 +93,7 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32
92 93
93ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, 94ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
94 s32 num_to_wake) { 95 s32 num_to_wake) {
95 SchedulerLock lock(system.Kernel()); 96 KScopedSchedulerLock lock(system.Kernel());
96 auto& memory = system.Memory(); 97 auto& memory = system.Memory();
97 98
98 // Ensure that we can write to the address. 99 // Ensure that we can write to the address.
@@ -153,11 +154,11 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
153 bool should_decrement) { 154 bool should_decrement) {
154 auto& memory = system.Memory(); 155 auto& memory = system.Memory();
155 auto& kernel = system.Kernel(); 156 auto& kernel = system.Kernel();
156 Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); 157 Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
157 158
158 Handle event_handle = InvalidHandle; 159 Handle event_handle = InvalidHandle;
159 { 160 {
160 SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); 161 KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
161 162
162 if (current_thread->IsPendingTermination()) { 163 if (current_thread->IsPendingTermination()) {
163 lock.CancelSleep(); 164 lock.CancelSleep();
@@ -210,7 +211,7 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
210 } 211 }
211 212
212 { 213 {
213 SchedulerLock lock(kernel); 214 KScopedSchedulerLock lock(kernel);
214 if (current_thread->IsWaitingForArbitration()) { 215 if (current_thread->IsWaitingForArbitration()) {
215 RemoveThread(SharedFrom(current_thread)); 216 RemoveThread(SharedFrom(current_thread));
216 current_thread->WaitForArbitration(false); 217 current_thread->WaitForArbitration(false);
@@ -223,11 +224,11 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
223ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { 224ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
224 auto& memory = system.Memory(); 225 auto& memory = system.Memory();
225 auto& kernel = system.Kernel(); 226 auto& kernel = system.Kernel();
226 Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); 227 Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
227 228
228 Handle event_handle = InvalidHandle; 229 Handle event_handle = InvalidHandle;
229 { 230 {
230 SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); 231 KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
231 232
232 if (current_thread->IsPendingTermination()) { 233 if (current_thread->IsPendingTermination()) {
233 lock.CancelSleep(); 234 lock.CancelSleep();
@@ -265,7 +266,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
265 } 266 }
266 267
267 { 268 {
268 SchedulerLock lock(kernel); 269 KScopedSchedulerLock lock(kernel);
269 if (current_thread->IsWaitingForArbitration()) { 270 if (current_thread->IsWaitingForArbitration()) {
270 RemoveThread(SharedFrom(current_thread)); 271 RemoveThread(SharedFrom(current_thread));
271 current_thread->WaitForArbitration(false); 272 current_thread->WaitForArbitration(false);
diff --git a/src/core/hle/kernel/global_scheduler_context.cpp b/src/core/hle/kernel/global_scheduler_context.cpp
new file mode 100644
index 000000000..a133e8ed0
--- /dev/null
+++ b/src/core/hle/kernel/global_scheduler_context.cpp
@@ -0,0 +1,52 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <mutex>
6
7#include "common/assert.h"
8#include "core/core.h"
9#include "core/hle/kernel/global_scheduler_context.h"
10#include "core/hle/kernel/k_scheduler.h"
11#include "core/hle/kernel/kernel.h"
12
13namespace Kernel {
14
15GlobalSchedulerContext::GlobalSchedulerContext(KernelCore& kernel)
16 : kernel{kernel}, scheduler_lock{kernel} {}
17
18GlobalSchedulerContext::~GlobalSchedulerContext() = default;
19
20void GlobalSchedulerContext::AddThread(std::shared_ptr<Thread> thread) {
21 std::scoped_lock lock{global_list_guard};
22 thread_list.push_back(std::move(thread));
23}
24
25void GlobalSchedulerContext::RemoveThread(std::shared_ptr<Thread> thread) {
26 std::scoped_lock lock{global_list_guard};
27 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
28 thread_list.end());
29}
30
31void GlobalSchedulerContext::PreemptThreads() {
32 // The priority levels at which the global scheduler preempts threads every 10 ms. They are
33 // ordered from Core 0 to Core 3.
34 static constexpr std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities{
35 59,
36 59,
37 59,
38 63,
39 };
40
41 ASSERT(IsLocked());
42 for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
43 const u32 priority = preemption_priorities[core_id];
44 kernel.Scheduler(core_id).RotateScheduledQueue(core_id, priority);
45 }
46}
47
48bool GlobalSchedulerContext::IsLocked() const {
49 return scheduler_lock.IsLockedByCurrentThread();
50}
51
52} // namespace Kernel
diff --git a/src/core/hle/kernel/global_scheduler_context.h b/src/core/hle/kernel/global_scheduler_context.h
new file mode 100644
index 000000000..5c7b89290
--- /dev/null
+++ b/src/core/hle/kernel/global_scheduler_context.h
@@ -0,0 +1,81 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/spin_lock.h"
12#include "core/hardware_properties.h"
13#include "core/hle/kernel/k_priority_queue.h"
14#include "core/hle/kernel/k_scheduler_lock.h"
15#include "core/hle/kernel/thread.h"
16
17namespace Kernel {
18
19class KernelCore;
20class SchedulerLock;
21
22using KSchedulerPriorityQueue =
23 KPriorityQueue<Thread, Core::Hardware::NUM_CPU_CORES, THREADPRIO_LOWEST, THREADPRIO_HIGHEST>;
24constexpr s32 HighestCoreMigrationAllowedPriority = 2;
25
26class GlobalSchedulerContext final {
27 friend class KScheduler;
28
29public:
30 using LockType = KAbstractSchedulerLock<KScheduler>;
31
32 explicit GlobalSchedulerContext(KernelCore& kernel);
33 ~GlobalSchedulerContext();
34
35 /// Adds a new thread to the scheduler
36 void AddThread(std::shared_ptr<Thread> thread);
37
38 /// Removes a thread from the scheduler
39 void RemoveThread(std::shared_ptr<Thread> thread);
40
41 /// Returns a list of all threads managed by the scheduler
42 [[nodiscard]] const std::vector<std::shared_ptr<Thread>>& GetThreadList() const {
43 return thread_list;
44 }
45
46 /**
47 * Rotates the scheduling queues of threads at a preemption priority and then does
48 * some core rebalancing. Preemption priorities can be found in the array
49 * 'preemption_priorities'.
50 *
51 * @note This operation happens every 10ms.
52 */
53 void PreemptThreads();
54
55 /// Returns true if the global scheduler lock is acquired
56 bool IsLocked() const;
57
58 [[nodiscard]] LockType& SchedulerLock() {
59 return scheduler_lock;
60 }
61
62 [[nodiscard]] const LockType& SchedulerLock() const {
63 return scheduler_lock;
64 }
65
66private:
67 friend class KScopedSchedulerLock;
68 friend class KScopedSchedulerLockAndSleep;
69
70 KernelCore& kernel;
71
72 std::atomic_bool scheduler_update_needed{};
73 KSchedulerPriorityQueue priority_queue;
74 LockType scheduler_lock;
75
76 /// Lists all thread ids that aren't deleted/etc.
77 std::vector<std::shared_ptr<Thread>> thread_list;
78 Common::SpinLock global_list_guard{};
79};
80
81} // namespace Kernel
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 3e745c18b..40988b0fd 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -8,9 +8,9 @@
8#include "core/core.h" 8#include "core/core.h"
9#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/handle_table.h" 10#include "core/hle/kernel/handle_table.h"
11#include "core/hle/kernel/k_scheduler.h"
11#include "core/hle/kernel/kernel.h" 12#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
13#include "core/hle/kernel/scheduler.h"
14#include "core/hle/kernel/thread.h" 14#include "core/hle/kernel/thread.h"
15 15
16namespace Kernel { 16namespace Kernel {
@@ -105,7 +105,7 @@ bool HandleTable::IsValid(Handle handle) const {
105 105
106std::shared_ptr<Object> HandleTable::GetGeneric(Handle handle) const { 106std::shared_ptr<Object> HandleTable::GetGeneric(Handle handle) const {
107 if (handle == CurrentThread) { 107 if (handle == CurrentThread) {
108 return SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); 108 return SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
109 } else if (handle == CurrentProcess) { 109 } else if (handle == CurrentProcess) {
110 return SharedFrom(kernel.CurrentProcess()); 110 return SharedFrom(kernel.CurrentProcess());
111 } 111 }
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 81f85643b..83decf6cf 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -17,11 +17,12 @@
17#include "core/hle/kernel/errors.h" 17#include "core/hle/kernel/errors.h"
18#include "core/hle/kernel/handle_table.h" 18#include "core/hle/kernel/handle_table.h"
19#include "core/hle/kernel/hle_ipc.h" 19#include "core/hle/kernel/hle_ipc.h"
20#include "core/hle/kernel/k_scheduler.h"
21#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
20#include "core/hle/kernel/kernel.h" 22#include "core/hle/kernel/kernel.h"
21#include "core/hle/kernel/object.h" 23#include "core/hle/kernel/object.h"
22#include "core/hle/kernel/process.h" 24#include "core/hle/kernel/process.h"
23#include "core/hle/kernel/readable_event.h" 25#include "core/hle/kernel/readable_event.h"
24#include "core/hle/kernel/scheduler.h"
25#include "core/hle/kernel/server_session.h" 26#include "core/hle/kernel/server_session.h"
26#include "core/hle/kernel/thread.h" 27#include "core/hle/kernel/thread.h"
27#include "core/hle/kernel/time_manager.h" 28#include "core/hle/kernel/time_manager.h"
@@ -45,44 +46,6 @@ void SessionRequestHandler::ClientDisconnected(
45 boost::range::remove_erase(connected_sessions, server_session); 46 boost::range::remove_erase(connected_sessions, server_session);
46} 47}
47 48
48std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
49 const std::string& reason, u64 timeout, WakeupCallback&& callback,
50 std::shared_ptr<WritableEvent> writable_event) {
51 // Put the client thread to sleep until the wait event is signaled or the timeout expires.
52
53 if (!writable_event) {
54 // Create event if not provided
55 const auto pair = WritableEvent::CreateEventPair(kernel, "HLE Pause Event: " + reason);
56 writable_event = pair.writable;
57 }
58
59 {
60 Handle event_handle = InvalidHandle;
61 SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout);
62 thread->SetHLECallback(
63 [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool {
64 ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT
65 ? ThreadWakeupReason::Timeout
66 : ThreadWakeupReason::Signal;
67 callback(thread, context, reason);
68 context.WriteToOutgoingCommandBuffer(*thread);
69 return true;
70 });
71 const auto readable_event{writable_event->GetReadableEvent()};
72 writable_event->Clear();
73 thread->SetHLESyncObject(readable_event.get());
74 thread->SetStatus(ThreadStatus::WaitHLEEvent);
75 thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
76 readable_event->AddWaitingThread(thread);
77 lock.Release();
78 thread->SetHLETimeEvent(event_handle);
79 }
80
81 is_thread_waiting = true;
82
83 return writable_event;
84}
85
86HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory, 49HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory,
87 std::shared_ptr<ServerSession> server_session, 50 std::shared_ptr<ServerSession> server_session,
88 std::shared_ptr<Thread> thread) 51 std::shared_ptr<Thread> thread)
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index c31a65476..b112e1ebd 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -129,23 +129,6 @@ public:
129 using WakeupCallback = std::function<void( 129 using WakeupCallback = std::function<void(
130 std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>; 130 std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>;
131 131
132 /**
133 * Puts the specified guest thread to sleep until the returned event is signaled or until the
134 * specified timeout expires.
135 * @param reason Reason for pausing the thread, to be used for debugging purposes.
136 * @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback
137 * invoked with a Timeout reason.
138 * @param callback Callback to be invoked when the thread is resumed. This callback must write
139 * the entire command response once again, regardless of the state of it before this function
140 * was called.
141 * @param writable_event Event to use to wake up the thread. If unspecified, an event will be
142 * created.
143 * @returns Event that when signaled will resume the thread and call the callback function.
144 */
145 std::shared_ptr<WritableEvent> SleepClientThread(
146 const std::string& reason, u64 timeout, WakeupCallback&& callback,
147 std::shared_ptr<WritableEvent> writable_event = nullptr);
148
149 /// Populates this context with data from the requesting process/thread. 132 /// Populates this context with data from the requesting process/thread.
150 ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table, 133 ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table,
151 u32_le* src_cmdbuf); 134 u32_le* src_cmdbuf);
diff --git a/src/core/hle/kernel/k_affinity_mask.h b/src/core/hle/kernel/k_affinity_mask.h
new file mode 100644
index 000000000..dd73781cd
--- /dev/null
+++ b/src/core/hle/kernel/k_affinity_mask.h
@@ -0,0 +1,58 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "core/hardware_properties.h"
13
14namespace Kernel {
15
16class KAffinityMask {
17public:
18 constexpr KAffinityMask() = default;
19
20 [[nodiscard]] constexpr u64 GetAffinityMask() const {
21 return this->mask;
22 }
23
24 constexpr void SetAffinityMask(u64 new_mask) {
25 ASSERT((new_mask & ~AllowedAffinityMask) == 0);
26 this->mask = new_mask;
27 }
28
29 [[nodiscard]] constexpr bool GetAffinity(s32 core) const {
30 return this->mask & GetCoreBit(core);
31 }
32
33 constexpr void SetAffinity(s32 core, bool set) {
34 ASSERT(0 <= core && core < static_cast<s32>(Core::Hardware::NUM_CPU_CORES));
35
36 if (set) {
37 this->mask |= GetCoreBit(core);
38 } else {
39 this->mask &= ~GetCoreBit(core);
40 }
41 }
42
43 constexpr void SetAll() {
44 this->mask = AllowedAffinityMask;
45 }
46
47private:
48 [[nodiscard]] static constexpr u64 GetCoreBit(s32 core) {
49 ASSERT(0 <= core && core < static_cast<s32>(Core::Hardware::NUM_CPU_CORES));
50 return (1ULL << core);
51 }
52
53 static constexpr u64 AllowedAffinityMask = (1ULL << Core::Hardware::NUM_CPU_CORES) - 1;
54
55 u64 mask{};
56};
57
58} // namespace Kernel
diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h
new file mode 100644
index 000000000..99fb8fe93
--- /dev/null
+++ b/src/core/hle/kernel/k_priority_queue.h
@@ -0,0 +1,451 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include <array>
11#include <concepts>
12
13#include "common/assert.h"
14#include "common/bit_set.h"
15#include "common/bit_util.h"
16#include "common/common_types.h"
17#include "common/concepts.h"
18
19namespace Kernel {
20
21class Thread;
22
23template <typename T>
24concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) {
25 { t.GetAffinityMask() }
26 ->Common::ConvertibleTo<u64>;
27 {t.SetAffinityMask(std::declval<u64>())};
28
29 { t.GetAffinity(std::declval<int32_t>()) }
30 ->std::same_as<bool>;
31 {t.SetAffinity(std::declval<int32_t>(), std::declval<bool>())};
32 {t.SetAll()};
33};
34
35template <typename T>
36concept KPriorityQueueMember = !std::is_reference_v<T> && requires(T & t) {
37 {typename T::QueueEntry()};
38 {(typename T::QueueEntry()).Initialize()};
39 {(typename T::QueueEntry()).SetPrev(std::addressof(t))};
40 {(typename T::QueueEntry()).SetNext(std::addressof(t))};
41 { (typename T::QueueEntry()).GetNext() }
42 ->std::same_as<T*>;
43 { (typename T::QueueEntry()).GetPrev() }
44 ->std::same_as<T*>;
45 { t.GetPriorityQueueEntry(std::declval<s32>()) }
46 ->std::same_as<typename T::QueueEntry&>;
47
48 {t.GetAffinityMask()};
49 { typename std::remove_cvref<decltype(t.GetAffinityMask())>::type() }
50 ->KPriorityQueueAffinityMask;
51
52 { t.GetActiveCore() }
53 ->Common::ConvertibleTo<s32>;
54 { t.GetPriority() }
55 ->Common::ConvertibleTo<s32>;
56};
57
58template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority>
59requires KPriorityQueueMember<Member> class KPriorityQueue {
60public:
61 using AffinityMaskType = typename std::remove_cv_t<
62 typename std::remove_reference<decltype(std::declval<Member>().GetAffinityMask())>::type>;
63
64 static_assert(LowestPriority >= 0);
65 static_assert(HighestPriority >= 0);
66 static_assert(LowestPriority >= HighestPriority);
67 static constexpr size_t NumPriority = LowestPriority - HighestPriority + 1;
68 static constexpr size_t NumCores = _NumCores;
69
70 static constexpr bool IsValidCore(s32 core) {
71 return 0 <= core && core < static_cast<s32>(NumCores);
72 }
73
74 static constexpr bool IsValidPriority(s32 priority) {
75 return HighestPriority <= priority && priority <= LowestPriority + 1;
76 }
77
78private:
79 using Entry = typename Member::QueueEntry;
80
81public:
82 class KPerCoreQueue {
83 private:
84 std::array<Entry, NumCores> root{};
85
86 public:
87 constexpr KPerCoreQueue() {
88 for (auto& per_core_root : root) {
89 per_core_root.Initialize();
90 }
91 }
92
93 constexpr bool PushBack(s32 core, Member* member) {
94 // Get the entry associated with the member.
95 Entry& member_entry = member->GetPriorityQueueEntry(core);
96
97 // Get the entry associated with the end of the queue.
98 Member* tail = this->root[core].GetPrev();
99 Entry& tail_entry =
100 (tail != nullptr) ? tail->GetPriorityQueueEntry(core) : this->root[core];
101
102 // Link the entries.
103 member_entry.SetPrev(tail);
104 member_entry.SetNext(nullptr);
105 tail_entry.SetNext(member);
106 this->root[core].SetPrev(member);
107
108 return tail == nullptr;
109 }
110
111 constexpr bool PushFront(s32 core, Member* member) {
112 // Get the entry associated with the member.
113 Entry& member_entry = member->GetPriorityQueueEntry(core);
114
115 // Get the entry associated with the front of the queue.
116 Member* head = this->root[core].GetNext();
117 Entry& head_entry =
118 (head != nullptr) ? head->GetPriorityQueueEntry(core) : this->root[core];
119
120 // Link the entries.
121 member_entry.SetPrev(nullptr);
122 member_entry.SetNext(head);
123 head_entry.SetPrev(member);
124 this->root[core].SetNext(member);
125
126 return (head == nullptr);
127 }
128
129 constexpr bool Remove(s32 core, Member* member) {
130 // Get the entry associated with the member.
131 Entry& member_entry = member->GetPriorityQueueEntry(core);
132
133 // Get the entries associated with next and prev.
134 Member* prev = member_entry.GetPrev();
135 Member* next = member_entry.GetNext();
136 Entry& prev_entry =
137 (prev != nullptr) ? prev->GetPriorityQueueEntry(core) : this->root[core];
138 Entry& next_entry =
139 (next != nullptr) ? next->GetPriorityQueueEntry(core) : this->root[core];
140
141 // Unlink.
142 prev_entry.SetNext(next);
143 next_entry.SetPrev(prev);
144
145 return (this->GetFront(core) == nullptr);
146 }
147
148 constexpr Member* GetFront(s32 core) const {
149 return this->root[core].GetNext();
150 }
151 };
152
153 class KPriorityQueueImpl {
154 public:
155 constexpr KPriorityQueueImpl() = default;
156
157 constexpr void PushBack(s32 priority, s32 core, Member* member) {
158 ASSERT(IsValidCore(core));
159 ASSERT(IsValidPriority(priority));
160
161 if (priority > LowestPriority) {
162 return;
163 }
164
165 if (this->queues[priority].PushBack(core, member)) {
166 this->available_priorities[core].SetBit(priority);
167 }
168 }
169
170 constexpr void PushFront(s32 priority, s32 core, Member* member) {
171 ASSERT(IsValidCore(core));
172 ASSERT(IsValidPriority(priority));
173
174 if (priority > LowestPriority) {
175 return;
176 }
177
178 if (this->queues[priority].PushFront(core, member)) {
179 this->available_priorities[core].SetBit(priority);
180 }
181 }
182
183 constexpr void Remove(s32 priority, s32 core, Member* member) {
184 ASSERT(IsValidCore(core));
185 ASSERT(IsValidPriority(priority));
186
187 if (priority > LowestPriority) {
188 return;
189 }
190
191 if (this->queues[priority].Remove(core, member)) {
192 this->available_priorities[core].ClearBit(priority);
193 }
194 }
195
196 constexpr Member* GetFront(s32 core) const {
197 ASSERT(IsValidCore(core));
198
199 const s32 priority =
200 static_cast<s32>(this->available_priorities[core].CountLeadingZero());
201 if (priority <= LowestPriority) {
202 return this->queues[priority].GetFront(core);
203 } else {
204 return nullptr;
205 }
206 }
207
208 constexpr Member* GetFront(s32 priority, s32 core) const {
209 ASSERT(IsValidCore(core));
210 ASSERT(IsValidPriority(priority));
211
212 if (priority <= LowestPriority) {
213 return this->queues[priority].GetFront(core);
214 } else {
215 return nullptr;
216 }
217 }
218
219 constexpr Member* GetNext(s32 core, const Member* member) const {
220 ASSERT(IsValidCore(core));
221
222 Member* next = member->GetPriorityQueueEntry(core).GetNext();
223 if (next == nullptr) {
224 const s32 priority = static_cast<s32>(
225 this->available_priorities[core].GetNextSet(member->GetPriority()));
226 if (priority <= LowestPriority) {
227 next = this->queues[priority].GetFront(core);
228 }
229 }
230 return next;
231 }
232
233 constexpr void MoveToFront(s32 priority, s32 core, Member* member) {
234 ASSERT(IsValidCore(core));
235 ASSERT(IsValidPriority(priority));
236
237 if (priority <= LowestPriority) {
238 this->queues[priority].Remove(core, member);
239 this->queues[priority].PushFront(core, member);
240 }
241 }
242
243 constexpr Member* MoveToBack(s32 priority, s32 core, Member* member) {
244 ASSERT(IsValidCore(core));
245 ASSERT(IsValidPriority(priority));
246
247 if (priority <= LowestPriority) {
248 this->queues[priority].Remove(core, member);
249 this->queues[priority].PushBack(core, member);
250 return this->queues[priority].GetFront(core);
251 } else {
252 return nullptr;
253 }
254 }
255
256 private:
257 std::array<KPerCoreQueue, NumPriority> queues{};
258 std::array<Common::BitSet64<NumPriority>, NumCores> available_priorities{};
259 };
260
261private:
262 KPriorityQueueImpl scheduled_queue;
263 KPriorityQueueImpl suggested_queue;
264
265private:
266 constexpr void ClearAffinityBit(u64& affinity, s32 core) {
267 affinity &= ~(u64(1) << core);
268 }
269
270 constexpr s32 GetNextCore(u64& affinity) {
271 const s32 core = Common::CountTrailingZeroes64(affinity);
272 ClearAffinityBit(affinity, core);
273 return core;
274 }
275
276 constexpr void PushBack(s32 priority, Member* member) {
277 ASSERT(IsValidPriority(priority));
278
279 // Push onto the scheduled queue for its core, if we can.
280 u64 affinity = member->GetAffinityMask().GetAffinityMask();
281 if (const s32 core = member->GetActiveCore(); core >= 0) {
282 this->scheduled_queue.PushBack(priority, core, member);
283 ClearAffinityBit(affinity, core);
284 }
285
286 // And suggest the thread for all other cores.
287 while (affinity) {
288 this->suggested_queue.PushBack(priority, GetNextCore(affinity), member);
289 }
290 }
291
292 constexpr void PushFront(s32 priority, Member* member) {
293 ASSERT(IsValidPriority(priority));
294
295 // Push onto the scheduled queue for its core, if we can.
296 u64 affinity = member->GetAffinityMask().GetAffinityMask();
297 if (const s32 core = member->GetActiveCore(); core >= 0) {
298 this->scheduled_queue.PushFront(priority, core, member);
299 ClearAffinityBit(affinity, core);
300 }
301
302 // And suggest the thread for all other cores.
303 // Note: Nintendo pushes onto the back of the suggested queue, not the front.
304 while (affinity) {
305 this->suggested_queue.PushBack(priority, GetNextCore(affinity), member);
306 }
307 }
308
309 constexpr void Remove(s32 priority, Member* member) {
310 ASSERT(IsValidPriority(priority));
311
312 // Remove from the scheduled queue for its core.
313 u64 affinity = member->GetAffinityMask().GetAffinityMask();
314 if (const s32 core = member->GetActiveCore(); core >= 0) {
315 this->scheduled_queue.Remove(priority, core, member);
316 ClearAffinityBit(affinity, core);
317 }
318
319 // Remove from the suggested queue for all other cores.
320 while (affinity) {
321 this->suggested_queue.Remove(priority, GetNextCore(affinity), member);
322 }
323 }
324
325public:
326 constexpr KPriorityQueue() = default;
327
328 // Getters.
329 constexpr Member* GetScheduledFront(s32 core) const {
330 return this->scheduled_queue.GetFront(core);
331 }
332
333 constexpr Member* GetScheduledFront(s32 core, s32 priority) const {
334 return this->scheduled_queue.GetFront(priority, core);
335 }
336
337 constexpr Member* GetSuggestedFront(s32 core) const {
338 return this->suggested_queue.GetFront(core);
339 }
340
341 constexpr Member* GetSuggestedFront(s32 core, s32 priority) const {
342 return this->suggested_queue.GetFront(priority, core);
343 }
344
345 constexpr Member* GetScheduledNext(s32 core, const Member* member) const {
346 return this->scheduled_queue.GetNext(core, member);
347 }
348
349 constexpr Member* GetSuggestedNext(s32 core, const Member* member) const {
350 return this->suggested_queue.GetNext(core, member);
351 }
352
353 constexpr Member* GetSamePriorityNext(s32 core, const Member* member) const {
354 return member->GetPriorityQueueEntry(core).GetNext();
355 }
356
357 // Mutators.
358 constexpr void PushBack(Member* member) {
359 this->PushBack(member->GetPriority(), member);
360 }
361
362 constexpr void Remove(Member* member) {
363 this->Remove(member->GetPriority(), member);
364 }
365
366 constexpr void MoveToScheduledFront(Member* member) {
367 this->scheduled_queue.MoveToFront(member->GetPriority(), member->GetActiveCore(), member);
368 }
369
370 constexpr Thread* MoveToScheduledBack(Member* member) {
371 return this->scheduled_queue.MoveToBack(member->GetPriority(), member->GetActiveCore(),
372 member);
373 }
374
375 // First class fancy operations.
376 constexpr void ChangePriority(s32 prev_priority, bool is_running, Member* member) {
377 ASSERT(IsValidPriority(prev_priority));
378
379 // Remove the member from the queues.
380 const s32 new_priority = member->GetPriority();
381 this->Remove(prev_priority, member);
382
383 // And enqueue. If the member is running, we want to keep it running.
384 if (is_running) {
385 this->PushFront(new_priority, member);
386 } else {
387 this->PushBack(new_priority, member);
388 }
389 }
390
391 constexpr void ChangeAffinityMask(s32 prev_core, const AffinityMaskType& prev_affinity,
392 Member* member) {
393 // Get the new information.
394 const s32 priority = member->GetPriority();
395 const AffinityMaskType& new_affinity = member->GetAffinityMask();
396 const s32 new_core = member->GetActiveCore();
397
398 // Remove the member from all queues it was in before.
399 for (s32 core = 0; core < static_cast<s32>(NumCores); core++) {
400 if (prev_affinity.GetAffinity(core)) {
401 if (core == prev_core) {
402 this->scheduled_queue.Remove(priority, core, member);
403 } else {
404 this->suggested_queue.Remove(priority, core, member);
405 }
406 }
407 }
408
409 // And add the member to all queues it should be in now.
410 for (s32 core = 0; core < static_cast<s32>(NumCores); core++) {
411 if (new_affinity.GetAffinity(core)) {
412 if (core == new_core) {
413 this->scheduled_queue.PushBack(priority, core, member);
414 } else {
415 this->suggested_queue.PushBack(priority, core, member);
416 }
417 }
418 }
419 }
420
421 constexpr void ChangeCore(s32 prev_core, Member* member, bool to_front = false) {
422 // Get the new information.
423 const s32 new_core = member->GetActiveCore();
424 const s32 priority = member->GetPriority();
425
426 // We don't need to do anything if the core is the same.
427 if (prev_core != new_core) {
428 // Remove from the scheduled queue for the previous core.
429 if (prev_core >= 0) {
430 this->scheduled_queue.Remove(priority, prev_core, member);
431 }
432
433 // Remove from the suggested queue and add to the scheduled queue for the new core.
434 if (new_core >= 0) {
435 this->suggested_queue.Remove(priority, new_core, member);
436 if (to_front) {
437 this->scheduled_queue.PushFront(priority, new_core, member);
438 } else {
439 this->scheduled_queue.PushBack(priority, new_core, member);
440 }
441 }
442
443 // Add to the suggested queue for the previous core.
444 if (prev_core >= 0) {
445 this->suggested_queue.PushBack(priority, prev_core, member);
446 }
447 }
448 }
449};
450
451} // namespace Kernel
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
new file mode 100644
index 000000000..c5fd82a6b
--- /dev/null
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -0,0 +1,784 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#include "common/assert.h"
9#include "common/bit_util.h"
10#include "common/fiber.h"
11#include "common/logging/log.h"
12#include "core/arm/arm_interface.h"
13#include "core/core.h"
14#include "core/core_timing.h"
15#include "core/cpu_manager.h"
16#include "core/hle/kernel/k_scheduler.h"
17#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
18#include "core/hle/kernel/kernel.h"
19#include "core/hle/kernel/physical_core.h"
20#include "core/hle/kernel/process.h"
21#include "core/hle/kernel/thread.h"
22#include "core/hle/kernel/time_manager.h"
23
24namespace Kernel {
25
26static void IncrementScheduledCount(Kernel::Thread* thread) {
27 if (auto process = thread->GetOwnerProcess(); process) {
28 process->IncrementScheduledCount();
29 }
30}
31
32void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule,
33 Core::EmuThreadHandle global_thread) {
34 u32 current_core = global_thread.host_handle;
35 bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
36 (current_core < Core::Hardware::NUM_CPU_CORES);
37
38 while (cores_pending_reschedule != 0) {
39 u32 core = Common::CountTrailingZeroes64(cores_pending_reschedule);
40 ASSERT(core < Core::Hardware::NUM_CPU_CORES);
41 if (!must_context_switch || core != current_core) {
42 auto& phys_core = kernel.PhysicalCore(core);
43 phys_core.Interrupt();
44 } else {
45 must_context_switch = true;
46 }
47 cores_pending_reschedule &= ~(1ULL << core);
48 }
49 if (must_context_switch) {
50 auto core_scheduler = kernel.CurrentScheduler();
51 kernel.ExitSVCProfile();
52 core_scheduler->RescheduleCurrentCore();
53 kernel.EnterSVCProfile();
54 }
55}
56
57u64 KScheduler::UpdateHighestPriorityThread(Thread* highest_thread) {
58 std::scoped_lock lock{guard};
59 if (Thread* prev_highest_thread = this->state.highest_priority_thread;
60 prev_highest_thread != highest_thread) {
61 if (prev_highest_thread != nullptr) {
62 IncrementScheduledCount(prev_highest_thread);
63 prev_highest_thread->SetLastScheduledTick(system.CoreTiming().GetCPUTicks());
64 }
65 if (this->state.should_count_idle) {
66 if (highest_thread != nullptr) {
67 // if (Process* process = highest_thread->GetOwnerProcess(); process != nullptr) {
68 // process->SetRunningThread(this->core_id, highest_thread,
69 // this->state.idle_count);
70 //}
71 } else {
72 this->state.idle_count++;
73 }
74 }
75
76 this->state.highest_priority_thread = highest_thread;
77 this->state.needs_scheduling = true;
78 return (1ULL << this->core_id);
79 } else {
80 return 0;
81 }
82}
83
84u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) {
85 ASSERT(kernel.GlobalSchedulerContext().IsLocked());
86
87 // Clear that we need to update.
88 ClearSchedulerUpdateNeeded(kernel);
89
90 u64 cores_needing_scheduling = 0, idle_cores = 0;
91 Thread* top_threads[Core::Hardware::NUM_CPU_CORES];
92 auto& priority_queue = GetPriorityQueue(kernel);
93
94 /// We want to go over all cores, finding the highest priority thread and determining if
95 /// scheduling is needed for that core.
96 for (size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
97 Thread* top_thread = priority_queue.GetScheduledFront(static_cast<s32>(core_id));
98 if (top_thread != nullptr) {
99 // If the thread has no waiters, we need to check if the process has a thread pinned.
100 // TODO(bunnei): Implement thread pinning
101 } else {
102 idle_cores |= (1ULL << core_id);
103 }
104
105 top_threads[core_id] = top_thread;
106 cores_needing_scheduling |=
107 kernel.Scheduler(core_id).UpdateHighestPriorityThread(top_threads[core_id]);
108 }
109
110 // Idle cores are bad. We're going to try to migrate threads to each idle core in turn.
111 while (idle_cores != 0) {
112 u32 core_id = Common::CountTrailingZeroes64(idle_cores);
113 if (Thread* suggested = priority_queue.GetSuggestedFront(core_id); suggested != nullptr) {
114 s32 migration_candidates[Core::Hardware::NUM_CPU_CORES];
115 size_t num_candidates = 0;
116
117 // While we have a suggested thread, try to migrate it!
118 while (suggested != nullptr) {
119 // Check if the suggested thread is the top thread on its core.
120 const s32 suggested_core = suggested->GetActiveCore();
121 if (Thread* top_thread =
122 (suggested_core >= 0) ? top_threads[suggested_core] : nullptr;
123 top_thread != suggested) {
124 // Make sure we're not dealing with threads too high priority for migration.
125 if (top_thread != nullptr &&
126 top_thread->GetPriority() < HighestCoreMigrationAllowedPriority) {
127 break;
128 }
129
130 // The suggested thread isn't bound to its core, so we can migrate it!
131 suggested->SetActiveCore(core_id);
132 priority_queue.ChangeCore(suggested_core, suggested);
133
134 top_threads[core_id] = suggested;
135 cores_needing_scheduling |=
136 kernel.Scheduler(core_id).UpdateHighestPriorityThread(top_threads[core_id]);
137 break;
138 }
139
140 // Note this core as a candidate for migration.
141 ASSERT(num_candidates < Core::Hardware::NUM_CPU_CORES);
142 migration_candidates[num_candidates++] = suggested_core;
143 suggested = priority_queue.GetSuggestedNext(core_id, suggested);
144 }
145
146 // If suggested is nullptr, we failed to migrate a specific thread. So let's try all our
147 // candidate cores' top threads.
148 if (suggested == nullptr) {
149 for (size_t i = 0; i < num_candidates; i++) {
150 // Check if there's some other thread that can run on the candidate core.
151 const s32 candidate_core = migration_candidates[i];
152 suggested = top_threads[candidate_core];
153 if (Thread* next_on_candidate_core =
154 priority_queue.GetScheduledNext(candidate_core, suggested);
155 next_on_candidate_core != nullptr) {
156 // The candidate core can run some other thread! We'll migrate its current
157 // top thread to us.
158 top_threads[candidate_core] = next_on_candidate_core;
159 cores_needing_scheduling |=
160 kernel.Scheduler(candidate_core)
161 .UpdateHighestPriorityThread(top_threads[candidate_core]);
162
163 // Perform the migration.
164 suggested->SetActiveCore(core_id);
165 priority_queue.ChangeCore(candidate_core, suggested);
166
167 top_threads[core_id] = suggested;
168 cores_needing_scheduling |=
169 kernel.Scheduler(core_id).UpdateHighestPriorityThread(
170 top_threads[core_id]);
171 break;
172 }
173 }
174 }
175 }
176
177 idle_cores &= ~(1ULL << core_id);
178 }
179
180 return cores_needing_scheduling;
181}
182
183void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state) {
184 ASSERT(kernel.GlobalSchedulerContext().IsLocked());
185
186 // Check if the state has changed, because if it hasn't there's nothing to do.
187 const auto cur_state = thread->scheduling_state;
188 if (cur_state == old_state) {
189 return;
190 }
191
192 // Update the priority queues.
193 if (old_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
194 // If we were previously runnable, then we're not runnable now, and we should remove.
195 GetPriorityQueue(kernel).Remove(thread);
196 IncrementScheduledCount(thread);
197 SetSchedulerUpdateNeeded(kernel);
198 } else if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
199 // If we're now runnable, then we weren't previously, and we should add.
200 GetPriorityQueue(kernel).PushBack(thread);
201 IncrementScheduledCount(thread);
202 SetSchedulerUpdateNeeded(kernel);
203 }
204}
205
206void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread,
207 u32 old_priority) {
208
209 ASSERT(kernel.GlobalSchedulerContext().IsLocked());
210
211 // If the thread is runnable, we want to change its priority in the queue.
212 if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
213 GetPriorityQueue(kernel).ChangePriority(
214 old_priority, thread == kernel.CurrentScheduler()->GetCurrentThread(), thread);
215 IncrementScheduledCount(thread);
216 SetSchedulerUpdateNeeded(kernel);
217 }
218}
219
220void KScheduler::OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
221 const KAffinityMask& old_affinity, s32 old_core) {
222 ASSERT(kernel.GlobalSchedulerContext().IsLocked());
223
224 // If the thread is runnable, we want to change its affinity in the queue.
225 if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
226 GetPriorityQueue(kernel).ChangeAffinityMask(old_core, old_affinity, thread);
227 IncrementScheduledCount(thread);
228 SetSchedulerUpdateNeeded(kernel);
229 }
230}
231
232void KScheduler::RotateScheduledQueue(s32 core_id, s32 priority) {
233 ASSERT(system.GlobalSchedulerContext().IsLocked());
234
235 // Get a reference to the priority queue.
236 auto& kernel = system.Kernel();
237 auto& priority_queue = GetPriorityQueue(kernel);
238
239 // Rotate the front of the queue to the end.
240 Thread* top_thread = priority_queue.GetScheduledFront(core_id, priority);
241 Thread* next_thread = nullptr;
242 if (top_thread != nullptr) {
243 next_thread = priority_queue.MoveToScheduledBack(top_thread);
244 if (next_thread != top_thread) {
245 IncrementScheduledCount(top_thread);
246 IncrementScheduledCount(next_thread);
247 }
248 }
249
250 // While we have a suggested thread, try to migrate it!
251 {
252 Thread* suggested = priority_queue.GetSuggestedFront(core_id, priority);
253 while (suggested != nullptr) {
254 // Check if the suggested thread is the top thread on its core.
255 const s32 suggested_core = suggested->GetActiveCore();
256 if (Thread* top_on_suggested_core =
257 (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core)
258 : nullptr;
259 top_on_suggested_core != suggested) {
260 // If the next thread is a new thread that has been waiting longer than our
261 // suggestion, we prefer it to our suggestion.
262 if (top_thread != next_thread && next_thread != nullptr &&
263 next_thread->GetLastScheduledTick() < suggested->GetLastScheduledTick()) {
264 suggested = nullptr;
265 break;
266 }
267
268 // If we're allowed to do a migration, do one.
269 // NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the suggestion
270 // to the front of the queue.
271 if (top_on_suggested_core == nullptr ||
272 top_on_suggested_core->GetPriority() >= HighestCoreMigrationAllowedPriority) {
273 suggested->SetActiveCore(core_id);
274 priority_queue.ChangeCore(suggested_core, suggested, true);
275 IncrementScheduledCount(suggested);
276 break;
277 }
278 }
279
280 // Get the next suggestion.
281 suggested = priority_queue.GetSamePriorityNext(core_id, suggested);
282 }
283 }
284
285 // Now that we might have migrated a thread with the same priority, check if we can do better.
286
287 {
288 Thread* best_thread = priority_queue.GetScheduledFront(core_id);
289 if (best_thread == GetCurrentThread()) {
290 best_thread = priority_queue.GetScheduledNext(core_id, best_thread);
291 }
292
293 // If the best thread we can choose has a priority the same or worse than ours, try to
294 // migrate a higher priority thread.
295 if (best_thread != nullptr && best_thread->GetPriority() >= static_cast<u32>(priority)) {
296 Thread* suggested = priority_queue.GetSuggestedFront(core_id);
297 while (suggested != nullptr) {
298 // If the suggestion's priority is the same as ours, don't bother.
299 if (suggested->GetPriority() >= best_thread->GetPriority()) {
300 break;
301 }
302
303 // Check if the suggested thread is the top thread on its core.
304 const s32 suggested_core = suggested->GetActiveCore();
305 if (Thread* top_on_suggested_core =
306 (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core)
307 : nullptr;
308 top_on_suggested_core != suggested) {
309 // If we're allowed to do a migration, do one.
310 // NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the
311 // suggestion to the front of the queue.
312 if (top_on_suggested_core == nullptr ||
313 top_on_suggested_core->GetPriority() >=
314 HighestCoreMigrationAllowedPriority) {
315 suggested->SetActiveCore(core_id);
316 priority_queue.ChangeCore(suggested_core, suggested, true);
317 IncrementScheduledCount(suggested);
318 break;
319 }
320 }
321
322 // Get the next suggestion.
323 suggested = priority_queue.GetSuggestedNext(core_id, suggested);
324 }
325 }
326 }
327
328 // After a rotation, we need a scheduler update.
329 SetSchedulerUpdateNeeded(kernel);
330}
331
332bool KScheduler::CanSchedule(KernelCore& kernel) {
333 return kernel.CurrentScheduler()->GetCurrentThread()->GetDisableDispatchCount() <= 1;
334}
335
336bool KScheduler::IsSchedulerUpdateNeeded(const KernelCore& kernel) {
337 return kernel.GlobalSchedulerContext().scheduler_update_needed.load(std::memory_order_acquire);
338}
339
340void KScheduler::SetSchedulerUpdateNeeded(KernelCore& kernel) {
341 kernel.GlobalSchedulerContext().scheduler_update_needed.store(true, std::memory_order_release);
342}
343
344void KScheduler::ClearSchedulerUpdateNeeded(KernelCore& kernel) {
345 kernel.GlobalSchedulerContext().scheduler_update_needed.store(false, std::memory_order_release);
346}
347
348void KScheduler::DisableScheduling(KernelCore& kernel) {
349 if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
350 ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 0);
351 scheduler->GetCurrentThread()->DisableDispatch();
352 }
353}
354
355void KScheduler::EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling,
356 Core::EmuThreadHandle global_thread) {
357 if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
358 scheduler->GetCurrentThread()->EnableDispatch();
359 }
360 RescheduleCores(kernel, cores_needing_scheduling, global_thread);
361}
362
363u64 KScheduler::UpdateHighestPriorityThreads(KernelCore& kernel) {
364 if (IsSchedulerUpdateNeeded(kernel)) {
365 return UpdateHighestPriorityThreadsImpl(kernel);
366 } else {
367 return 0;
368 }
369}
370
371KSchedulerPriorityQueue& KScheduler::GetPriorityQueue(KernelCore& kernel) {
372 return kernel.GlobalSchedulerContext().priority_queue;
373}
374
375void KScheduler::YieldWithoutCoreMigration() {
376 auto& kernel = system.Kernel();
377
378 // Validate preconditions.
379 ASSERT(CanSchedule(kernel));
380 ASSERT(kernel.CurrentProcess() != nullptr);
381
382 // Get the current thread and process.
383 Thread& cur_thread = *GetCurrentThread();
384 Process& cur_process = *kernel.CurrentProcess();
385
386 // If the thread's yield count matches, there's nothing for us to do.
387 if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) {
388 return;
389 }
390
391 // Get a reference to the priority queue.
392 auto& priority_queue = GetPriorityQueue(kernel);
393
394 // Perform the yield.
395 {
396 KScopedSchedulerLock lock(kernel);
397
398 const auto cur_state = cur_thread.scheduling_state;
399 if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
400 // Put the current thread at the back of the queue.
401 Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread));
402 IncrementScheduledCount(std::addressof(cur_thread));
403
404 // If the next thread is different, we have an update to perform.
405 if (next_thread != std::addressof(cur_thread)) {
406 SetSchedulerUpdateNeeded(kernel);
407 } else {
408 // Otherwise, set the thread's yield count so that we won't waste work until the
409 // process is scheduled again.
410 cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount());
411 }
412 }
413 }
414}
415
416void KScheduler::YieldWithCoreMigration() {
417 auto& kernel = system.Kernel();
418
419 // Validate preconditions.
420 ASSERT(CanSchedule(kernel));
421 ASSERT(kernel.CurrentProcess() != nullptr);
422
423 // Get the current thread and process.
424 Thread& cur_thread = *GetCurrentThread();
425 Process& cur_process = *kernel.CurrentProcess();
426
427 // If the thread's yield count matches, there's nothing for us to do.
428 if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) {
429 return;
430 }
431
432 // Get a reference to the priority queue.
433 auto& priority_queue = GetPriorityQueue(kernel);
434
435 // Perform the yield.
436 {
437 KScopedSchedulerLock lock(kernel);
438
439 const auto cur_state = cur_thread.scheduling_state;
440 if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
441 // Get the current active core.
442 const s32 core_id = cur_thread.GetActiveCore();
443
444 // Put the current thread at the back of the queue.
445 Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread));
446 IncrementScheduledCount(std::addressof(cur_thread));
447
448 // While we have a suggested thread, try to migrate it!
449 bool recheck = false;
450 Thread* suggested = priority_queue.GetSuggestedFront(core_id);
451 while (suggested != nullptr) {
452 // Check if the suggested thread is the thread running on its core.
453 const s32 suggested_core = suggested->GetActiveCore();
454
455 if (Thread* running_on_suggested_core =
456 (suggested_core >= 0)
457 ? kernel.Scheduler(suggested_core).state.highest_priority_thread
458 : nullptr;
459 running_on_suggested_core != suggested) {
460 // If the current thread's priority is higher than our suggestion's we prefer
461 // the next thread to the suggestion. We also prefer the next thread when the
462 // current thread's priority is equal to the suggestions, but the next thread
463 // has been waiting longer.
464 if ((suggested->GetPriority() > cur_thread.GetPriority()) ||
465 (suggested->GetPriority() == cur_thread.GetPriority() &&
466 next_thread != std::addressof(cur_thread) &&
467 next_thread->GetLastScheduledTick() < suggested->GetLastScheduledTick())) {
468 suggested = nullptr;
469 break;
470 }
471
472 // If we're allowed to do a migration, do one.
473 // NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the
474 // suggestion to the front of the queue.
475 if (running_on_suggested_core == nullptr ||
476 running_on_suggested_core->GetPriority() >=
477 HighestCoreMigrationAllowedPriority) {
478 suggested->SetActiveCore(core_id);
479 priority_queue.ChangeCore(suggested_core, suggested, true);
480 IncrementScheduledCount(suggested);
481 break;
482 } else {
483 // We couldn't perform a migration, but we should check again on a future
484 // yield.
485 recheck = true;
486 }
487 }
488
489 // Get the next suggestion.
490 suggested = priority_queue.GetSuggestedNext(core_id, suggested);
491 }
492
493 // If we still have a suggestion or the next thread is different, we have an update to
494 // perform.
495 if (suggested != nullptr || next_thread != std::addressof(cur_thread)) {
496 SetSchedulerUpdateNeeded(kernel);
497 } else if (!recheck) {
498 // Otherwise if we don't need to re-check, set the thread's yield count so that we
499 // won't waste work until the process is scheduled again.
500 cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount());
501 }
502 }
503 }
504}
505
506void KScheduler::YieldToAnyThread() {
507 auto& kernel = system.Kernel();
508
509 // Validate preconditions.
510 ASSERT(CanSchedule(kernel));
511 ASSERT(kernel.CurrentProcess() != nullptr);
512
513 // Get the current thread and process.
514 Thread& cur_thread = *GetCurrentThread();
515 Process& cur_process = *kernel.CurrentProcess();
516
517 // If the thread's yield count matches, there's nothing for us to do.
518 if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) {
519 return;
520 }
521
522 // Get a reference to the priority queue.
523 auto& priority_queue = GetPriorityQueue(kernel);
524
525 // Perform the yield.
526 {
527 KScopedSchedulerLock lock(kernel);
528
529 const auto cur_state = cur_thread.scheduling_state;
530 if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
531 // Get the current active core.
532 const s32 core_id = cur_thread.GetActiveCore();
533
534 // Migrate the current thread to core -1.
535 cur_thread.SetActiveCore(-1);
536 priority_queue.ChangeCore(core_id, std::addressof(cur_thread));
537 IncrementScheduledCount(std::addressof(cur_thread));
538
539 // If there's nothing scheduled, we can try to perform a migration.
540 if (priority_queue.GetScheduledFront(core_id) == nullptr) {
541 // While we have a suggested thread, try to migrate it!
542 Thread* suggested = priority_queue.GetSuggestedFront(core_id);
543 while (suggested != nullptr) {
544 // Check if the suggested thread is the top thread on its core.
545 const s32 suggested_core = suggested->GetActiveCore();
546 if (Thread* top_on_suggested_core =
547 (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core)
548 : nullptr;
549 top_on_suggested_core != suggested) {
550 // If we're allowed to do a migration, do one.
551 if (top_on_suggested_core == nullptr ||
552 top_on_suggested_core->GetPriority() >=
553 HighestCoreMigrationAllowedPriority) {
554 suggested->SetActiveCore(core_id);
555 priority_queue.ChangeCore(suggested_core, suggested);
556 IncrementScheduledCount(suggested);
557 }
558
559 // Regardless of whether we migrated, we had a candidate, so we're done.
560 break;
561 }
562
563 // Get the next suggestion.
564 suggested = priority_queue.GetSuggestedNext(core_id, suggested);
565 }
566
567 // If the suggestion is different from the current thread, we need to perform an
568 // update.
569 if (suggested != std::addressof(cur_thread)) {
570 SetSchedulerUpdateNeeded(kernel);
571 } else {
572 // Otherwise, set the thread's yield count so that we won't waste work until the
573 // process is scheduled again.
574 cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount());
575 }
576 } else {
577 // Otherwise, we have an update to perform.
578 SetSchedulerUpdateNeeded(kernel);
579 }
580 }
581 }
582}
583
584KScheduler::KScheduler(Core::System& system, std::size_t core_id)
585 : system(system), core_id(core_id) {
586 switch_fiber = std::make_shared<Common::Fiber>(OnSwitch, this);
587 this->state.needs_scheduling = true;
588 this->state.interrupt_task_thread_runnable = false;
589 this->state.should_count_idle = false;
590 this->state.idle_count = 0;
591 this->state.idle_thread_stack = nullptr;
592 this->state.highest_priority_thread = nullptr;
593}
594
595KScheduler::~KScheduler() = default;
596
597Thread* KScheduler::GetCurrentThread() const {
598 if (current_thread) {
599 return current_thread;
600 }
601 return idle_thread;
602}
603
604u64 KScheduler::GetLastContextSwitchTicks() const {
605 return last_context_switch_time;
606}
607
608void KScheduler::RescheduleCurrentCore() {
609 ASSERT(GetCurrentThread()->GetDisableDispatchCount() == 1);
610
611 auto& phys_core = system.Kernel().PhysicalCore(core_id);
612 if (phys_core.IsInterrupted()) {
613 phys_core.ClearInterrupt();
614 }
615 guard.lock();
616 if (this->state.needs_scheduling) {
617 Schedule();
618 } else {
619 guard.unlock();
620 }
621}
622
623void KScheduler::OnThreadStart() {
624 SwitchContextStep2();
625}
626
627void KScheduler::Unload(Thread* thread) {
628 if (thread) {
629 thread->SetIsRunning(false);
630 if (thread->IsContinuousOnSVC() && !thread->IsHLEThread()) {
631 system.ArmInterface(core_id).ExceptionalExit();
632 thread->SetContinuousOnSVC(false);
633 }
634 if (!thread->IsHLEThread() && !thread->HasExited()) {
635 Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
636 cpu_core.SaveContext(thread->GetContext32());
637 cpu_core.SaveContext(thread->GetContext64());
638 // Save the TPIDR_EL0 system register in case it was modified.
639 thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
640 cpu_core.ClearExclusiveState();
641 }
642 thread->context_guard.unlock();
643 }
644}
645
646void KScheduler::Reload(Thread* thread) {
647 if (thread) {
648 ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
649 "Thread must be runnable.");
650
651 // Cancel any outstanding wakeup events for this thread
652 thread->SetIsRunning(true);
653 thread->SetWasRunning(false);
654
655 auto* const thread_owner_process = thread->GetOwnerProcess();
656 if (thread_owner_process != nullptr) {
657 system.Kernel().MakeCurrentProcess(thread_owner_process);
658 }
659 if (!thread->IsHLEThread()) {
660 Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
661 cpu_core.LoadContext(thread->GetContext32());
662 cpu_core.LoadContext(thread->GetContext64());
663 cpu_core.SetTlsAddress(thread->GetTLSAddress());
664 cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
665 cpu_core.ClearExclusiveState();
666 }
667 }
668}
669
670void KScheduler::SwitchContextStep2() {
671 // Load context of new thread
672 Reload(current_thread);
673
674 RescheduleCurrentCore();
675}
676
677void KScheduler::ScheduleImpl() {
678 Thread* previous_thread = current_thread;
679 current_thread = state.highest_priority_thread;
680
681 this->state.needs_scheduling = false;
682
683 if (current_thread == previous_thread) {
684 guard.unlock();
685 return;
686 }
687
688 Process* const previous_process = system.Kernel().CurrentProcess();
689
690 UpdateLastContextSwitchTime(previous_thread, previous_process);
691
692 // Save context for previous thread
693 Unload(previous_thread);
694
695 std::shared_ptr<Common::Fiber>* old_context;
696 if (previous_thread != nullptr) {
697 old_context = &previous_thread->GetHostContext();
698 } else {
699 old_context = &idle_thread->GetHostContext();
700 }
701 guard.unlock();
702
703 Common::Fiber::YieldTo(*old_context, switch_fiber);
704 /// When a thread wakes up, the scheduler may have changed to other in another core.
705 auto& next_scheduler = *system.Kernel().CurrentScheduler();
706 next_scheduler.SwitchContextStep2();
707}
708
709void KScheduler::OnSwitch(void* this_scheduler) {
710 KScheduler* sched = static_cast<KScheduler*>(this_scheduler);
711 sched->SwitchToCurrent();
712}
713
714void KScheduler::SwitchToCurrent() {
715 while (true) {
716 {
717 std::scoped_lock lock{guard};
718 current_thread = state.highest_priority_thread;
719 this->state.needs_scheduling = false;
720 }
721 const auto is_switch_pending = [this] {
722 std::scoped_lock lock{guard};
723 return state.needs_scheduling.load(std::memory_order_relaxed);
724 };
725 do {
726 if (current_thread != nullptr && !current_thread->IsHLEThread()) {
727 current_thread->context_guard.lock();
728 if (!current_thread->IsRunnable()) {
729 current_thread->context_guard.unlock();
730 break;
731 }
732 if (static_cast<u32>(current_thread->GetProcessorID()) != core_id) {
733 current_thread->context_guard.unlock();
734 break;
735 }
736 }
737 std::shared_ptr<Common::Fiber>* next_context;
738 if (current_thread != nullptr) {
739 next_context = &current_thread->GetHostContext();
740 } else {
741 next_context = &idle_thread->GetHostContext();
742 }
743 Common::Fiber::YieldTo(switch_fiber, *next_context);
744 } while (!is_switch_pending());
745 }
746}
747
748void KScheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
749 const u64 prev_switch_ticks = last_context_switch_time;
750 const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
751 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
752
753 if (thread != nullptr) {
754 thread->UpdateCPUTimeTicks(update_ticks);
755 }
756
757 if (process != nullptr) {
758 process->UpdateCPUTimeTicks(update_ticks);
759 }
760
761 last_context_switch_time = most_recent_switch_ticks;
762}
763
764void KScheduler::Initialize() {
765 std::string name = "Idle Thread Id:" + std::to_string(core_id);
766 std::function<void(void*)> init_func = Core::CpuManager::GetIdleThreadStartFunc();
767 void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
768 ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE);
769 auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0,
770 nullptr, std::move(init_func), init_func_parameter);
771 idle_thread = thread_res.Unwrap().get();
772
773 {
774 KScopedSchedulerLock lock{system.Kernel()};
775 idle_thread->SetStatus(ThreadStatus::Ready);
776 }
777}
778
779KScopedSchedulerLock::KScopedSchedulerLock(KernelCore& kernel)
780 : KScopedLock(kernel.GlobalSchedulerContext().SchedulerLock()) {}
781
782KScopedSchedulerLock::~KScopedSchedulerLock() = default;
783
784} // namespace Kernel
diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h
new file mode 100644
index 000000000..e84abc84c
--- /dev/null
+++ b/src/core/hle/kernel/k_scheduler.h
@@ -0,0 +1,201 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include <atomic>
11
12#include "common/common_types.h"
13#include "common/spin_lock.h"
14#include "core/hle/kernel/global_scheduler_context.h"
15#include "core/hle/kernel/k_priority_queue.h"
16#include "core/hle/kernel/k_scheduler_lock.h"
17#include "core/hle/kernel/k_scoped_lock.h"
18
19namespace Common {
20class Fiber;
21}
22
23namespace Core {
24class System;
25}
26
27namespace Kernel {
28
29class KernelCore;
30class Process;
31class SchedulerLock;
32class Thread;
33
34class KScheduler final {
35public:
36 explicit KScheduler(Core::System& system, std::size_t core_id);
37 ~KScheduler();
38
39 /// Reschedules to the next available thread (call after current thread is suspended)
40 void RescheduleCurrentCore();
41
42 /// Reschedules cores pending reschedule, to be called on EnableScheduling.
43 static void RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule,
44 Core::EmuThreadHandle global_thread);
45
46 /// The next two are for SingleCore Only.
47 /// Unload current thread before preempting core.
48 void Unload(Thread* thread);
49
50 /// Reload current thread after core preemption.
51 void Reload(Thread* thread);
52
53 /// Gets the current running thread
54 [[nodiscard]] Thread* GetCurrentThread() const;
55
56 /// Gets the timestamp for the last context switch in ticks.
57 [[nodiscard]] u64 GetLastContextSwitchTicks() const;
58
59 [[nodiscard]] bool ContextSwitchPending() const {
60 return state.needs_scheduling.load(std::memory_order_relaxed);
61 }
62
63 void Initialize();
64
65 void OnThreadStart();
66
67 [[nodiscard]] std::shared_ptr<Common::Fiber>& ControlContext() {
68 return switch_fiber;
69 }
70
71 [[nodiscard]] const std::shared_ptr<Common::Fiber>& ControlContext() const {
72 return switch_fiber;
73 }
74
75 [[nodiscard]] u64 UpdateHighestPriorityThread(Thread* highest_thread);
76
77 /**
78 * Takes a thread and moves it to the back of the it's priority list.
79 *
80 * @note This operation can be redundant and no scheduling is changed if marked as so.
81 */
82 void YieldWithoutCoreMigration();
83
84 /**
85 * Takes a thread and moves it to the back of the it's priority list.
86 * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
87 * a better priority than the next thread in the core.
88 *
89 * @note This operation can be redundant and no scheduling is changed if marked as so.
90 */
91 void YieldWithCoreMigration();
92
93 /**
94 * Takes a thread and moves it out of the scheduling queue.
95 * and into the suggested queue. If no thread can be scheduled afterwards in that core,
96 * a suggested thread is obtained instead.
97 *
98 * @note This operation can be redundant and no scheduling is changed if marked as so.
99 */
100 void YieldToAnyThread();
101
102 /// Notify the scheduler a thread's status has changed.
103 static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state);
104
105 /// Notify the scheduler a thread's priority has changed.
106 static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread,
107 u32 old_priority);
108
109 /// Notify the scheduler a thread's core and/or affinity mask has changed.
110 static void OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread,
111 const KAffinityMask& old_affinity, s32 old_core);
112
113 static bool CanSchedule(KernelCore& kernel);
114 static bool IsSchedulerUpdateNeeded(const KernelCore& kernel);
115 static void SetSchedulerUpdateNeeded(KernelCore& kernel);
116 static void ClearSchedulerUpdateNeeded(KernelCore& kernel);
117 static void DisableScheduling(KernelCore& kernel);
118 static void EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling,
119 Core::EmuThreadHandle global_thread);
120 [[nodiscard]] static u64 UpdateHighestPriorityThreads(KernelCore& kernel);
121
122private:
123 friend class GlobalSchedulerContext;
124
125 /**
126 * Takes care of selecting the new scheduled threads in three steps:
127 *
128 * 1. First a thread is selected from the top of the priority queue. If no thread
129 * is obtained then we move to step two, else we are done.
130 *
131 * 2. Second we try to get a suggested thread that's not assigned to any core or
132 * that is not the top thread in that core.
133 *
134 * 3. Third is no suggested thread is found, we do a second pass and pick a running
135 * thread in another core and swap it with its current thread.
136 *
137 * returns the cores needing scheduling.
138 */
139 [[nodiscard]] static u64 UpdateHighestPriorityThreadsImpl(KernelCore& kernel);
140
141 [[nodiscard]] static KSchedulerPriorityQueue& GetPriorityQueue(KernelCore& kernel);
142
143 void RotateScheduledQueue(s32 core_id, s32 priority);
144
145 void Schedule() {
146 ASSERT(GetCurrentThread()->GetDisableDispatchCount() == 1);
147 this->ScheduleImpl();
148 }
149
150 /// Switches the CPU's active thread context to that of the specified thread
151 void ScheduleImpl();
152
153 /// When a thread wakes up, it must run this through it's new scheduler
154 void SwitchContextStep2();
155
156 /**
157 * Called on every context switch to update the internal timestamp
158 * This also updates the running time ticks for the given thread and
159 * process using the following difference:
160 *
161 * ticks += most_recent_ticks - last_context_switch_ticks
162 *
163 * The internal tick timestamp for the scheduler is simply the
164 * most recent tick count retrieved. No special arithmetic is
165 * applied to it.
166 */
167 void UpdateLastContextSwitchTime(Thread* thread, Process* process);
168
169 static void OnSwitch(void* this_scheduler);
170 void SwitchToCurrent();
171
172 Thread* current_thread{};
173 Thread* idle_thread{};
174
175 std::shared_ptr<Common::Fiber> switch_fiber{};
176
177 struct SchedulingState {
178 std::atomic<bool> needs_scheduling;
179 bool interrupt_task_thread_runnable{};
180 bool should_count_idle{};
181 u64 idle_count{};
182 Thread* highest_priority_thread{};
183 void* idle_thread_stack{};
184 };
185
186 SchedulingState state;
187
188 Core::System& system;
189 u64 last_context_switch_time{};
190 const std::size_t core_id;
191
192 Common::SpinLock guard{};
193};
194
195class KScopedSchedulerLock : KScopedLock<GlobalSchedulerContext::LockType> {
196public:
197 explicit KScopedSchedulerLock(KernelCore& kernel);
198 ~KScopedSchedulerLock();
199};
200
201} // namespace Kernel
diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h
new file mode 100644
index 000000000..2f1c1f691
--- /dev/null
+++ b/src/core/hle/kernel/k_scheduler_lock.h
@@ -0,0 +1,75 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include "common/assert.h"
11#include "common/spin_lock.h"
12#include "core/hardware_properties.h"
13#include "core/hle/kernel/kernel.h"
14
15namespace Kernel {
16
17class KernelCore;
18
19template <typename SchedulerType>
20class KAbstractSchedulerLock {
21public:
22 explicit KAbstractSchedulerLock(KernelCore& kernel) : kernel{kernel} {}
23
24 bool IsLockedByCurrentThread() const {
25 return this->owner_thread == kernel.GetCurrentEmuThreadID();
26 }
27
28 void Lock() {
29 if (this->IsLockedByCurrentThread()) {
30 // If we already own the lock, we can just increment the count.
31 ASSERT(this->lock_count > 0);
32 this->lock_count++;
33 } else {
34 // Otherwise, we want to disable scheduling and acquire the spinlock.
35 SchedulerType::DisableScheduling(kernel);
36 this->spin_lock.lock();
37
38 // For debug, ensure that our state is valid.
39 ASSERT(this->lock_count == 0);
40 ASSERT(this->owner_thread == Core::EmuThreadHandle::InvalidHandle());
41
42 // Increment count, take ownership.
43 this->lock_count = 1;
44 this->owner_thread = kernel.GetCurrentEmuThreadID();
45 }
46 }
47
48 void Unlock() {
49 ASSERT(this->IsLockedByCurrentThread());
50 ASSERT(this->lock_count > 0);
51
52 // Release an instance of the lock.
53 if ((--this->lock_count) == 0) {
54 // We're no longer going to hold the lock. Take note of what cores need scheduling.
55 const u64 cores_needing_scheduling =
56 SchedulerType::UpdateHighestPriorityThreads(kernel);
57 Core::EmuThreadHandle leaving_thread = owner_thread;
58
59 // Note that we no longer hold the lock, and unlock the spinlock.
60 this->owner_thread = Core::EmuThreadHandle::InvalidHandle();
61 this->spin_lock.unlock();
62
63 // Enable scheduling, and perform a rescheduling operation.
64 SchedulerType::EnableScheduling(kernel, cores_needing_scheduling, leaving_thread);
65 }
66 }
67
68private:
69 KernelCore& kernel;
70 Common::SpinLock spin_lock{};
71 s32 lock_count{};
72 Core::EmuThreadHandle owner_thread{Core::EmuThreadHandle::InvalidHandle()};
73};
74
75} // namespace Kernel
diff --git a/src/core/hle/kernel/k_scoped_lock.h b/src/core/hle/kernel/k_scoped_lock.h
new file mode 100644
index 000000000..d7cc557b2
--- /dev/null
+++ b/src/core/hle/kernel/k_scoped_lock.h
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include "common/common_types.h"
11
12namespace Kernel {
13
14template <typename T>
15concept KLockable = !std::is_reference_v<T> && requires(T & t) {
16 { t.Lock() }
17 ->std::same_as<void>;
18 { t.Unlock() }
19 ->std::same_as<void>;
20};
21
22template <typename T>
23requires KLockable<T> class KScopedLock {
24public:
25 explicit KScopedLock(T* l) : lock_ptr(l) {
26 this->lock_ptr->Lock();
27 }
28 explicit KScopedLock(T& l) : KScopedLock(std::addressof(l)) { /* ... */
29 }
30 ~KScopedLock() {
31 this->lock_ptr->Unlock();
32 }
33
34 KScopedLock(const KScopedLock&) = delete;
35 KScopedLock(KScopedLock&&) = delete;
36
37private:
38 T* lock_ptr;
39};
40
41} // namespace Kernel
diff --git a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h
new file mode 100644
index 000000000..2bb3817fa
--- /dev/null
+++ b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include "common/common_types.h"
11#include "core/hle/kernel/handle_table.h"
12#include "core/hle/kernel/kernel.h"
13#include "core/hle/kernel/thread.h"
14#include "core/hle/kernel/time_manager.h"
15
16namespace Kernel {
17
18class KScopedSchedulerLockAndSleep {
19public:
20 explicit KScopedSchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, Thread* t,
21 s64 timeout)
22 : kernel(kernel), event_handle(event_handle), thread(t), timeout_tick(timeout) {
23 event_handle = InvalidHandle;
24
25 // Lock the scheduler.
26 kernel.GlobalSchedulerContext().scheduler_lock.Lock();
27 }
28
29 ~KScopedSchedulerLockAndSleep() {
30 // Register the sleep.
31 if (this->timeout_tick > 0) {
32 kernel.TimeManager().ScheduleTimeEvent(event_handle, this->thread, this->timeout_tick);
33 }
34
35 // Unlock the scheduler.
36 kernel.GlobalSchedulerContext().scheduler_lock.Unlock();
37 }
38
39 void CancelSleep() {
40 this->timeout_tick = 0;
41 }
42
43private:
44 KernelCore& kernel;
45 Handle& event_handle;
46 Thread* thread{};
47 s64 timeout_tick{};
48};
49
50} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 929db696d..e8ece8164 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -8,13 +8,14 @@
8#include <functional> 8#include <functional>
9#include <memory> 9#include <memory>
10#include <thread> 10#include <thread>
11#include <unordered_map> 11#include <unordered_set>
12#include <utility> 12#include <utility>
13 13
14#include "common/assert.h" 14#include "common/assert.h"
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "common/microprofile.h" 16#include "common/microprofile.h"
17#include "common/thread.h" 17#include "common/thread.h"
18#include "common/thread_worker.h"
18#include "core/arm/arm_interface.h" 19#include "core/arm/arm_interface.h"
19#include "core/arm/cpu_interrupt_handler.h" 20#include "core/arm/cpu_interrupt_handler.h"
20#include "core/arm/exclusive_monitor.h" 21#include "core/arm/exclusive_monitor.h"
@@ -27,6 +28,7 @@
27#include "core/hle/kernel/client_port.h" 28#include "core/hle/kernel/client_port.h"
28#include "core/hle/kernel/errors.h" 29#include "core/hle/kernel/errors.h"
29#include "core/hle/kernel/handle_table.h" 30#include "core/hle/kernel/handle_table.h"
31#include "core/hle/kernel/k_scheduler.h"
30#include "core/hle/kernel/kernel.h" 32#include "core/hle/kernel/kernel.h"
31#include "core/hle/kernel/memory/memory_layout.h" 33#include "core/hle/kernel/memory/memory_layout.h"
32#include "core/hle/kernel/memory/memory_manager.h" 34#include "core/hle/kernel/memory/memory_manager.h"
@@ -34,7 +36,7 @@
34#include "core/hle/kernel/physical_core.h" 36#include "core/hle/kernel/physical_core.h"
35#include "core/hle/kernel/process.h" 37#include "core/hle/kernel/process.h"
36#include "core/hle/kernel/resource_limit.h" 38#include "core/hle/kernel/resource_limit.h"
37#include "core/hle/kernel/scheduler.h" 39#include "core/hle/kernel/service_thread.h"
38#include "core/hle/kernel/shared_memory.h" 40#include "core/hle/kernel/shared_memory.h"
39#include "core/hle/kernel/synchronization.h" 41#include "core/hle/kernel/synchronization.h"
40#include "core/hle/kernel/thread.h" 42#include "core/hle/kernel/thread.h"
@@ -49,17 +51,20 @@ namespace Kernel {
49 51
50struct KernelCore::Impl { 52struct KernelCore::Impl {
51 explicit Impl(Core::System& system, KernelCore& kernel) 53 explicit Impl(Core::System& system, KernelCore& kernel)
52 : global_scheduler{kernel}, synchronization{system}, time_manager{system}, 54 : synchronization{system}, time_manager{system}, global_handle_table{kernel}, system{
53 global_handle_table{kernel}, system{system} {} 55 system} {}
54 56
55 void SetMulticore(bool is_multicore) { 57 void SetMulticore(bool is_multicore) {
56 this->is_multicore = is_multicore; 58 this->is_multicore = is_multicore;
57 } 59 }
58 60
59 void Initialize(KernelCore& kernel) { 61 void Initialize(KernelCore& kernel) {
60 Shutdown();
61 RegisterHostThread(); 62 RegisterHostThread();
62 63
64 global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
65 service_thread_manager =
66 std::make_unique<Common::ThreadWorker>(1, "yuzu:ServiceThreadManager");
67
63 InitializePhysicalCores(); 68 InitializePhysicalCores();
64 InitializeSystemResourceLimit(kernel); 69 InitializeSystemResourceLimit(kernel);
65 InitializeMemoryLayout(); 70 InitializeMemoryLayout();
@@ -75,6 +80,12 @@ struct KernelCore::Impl {
75 } 80 }
76 81
77 void Shutdown() { 82 void Shutdown() {
83 process_list.clear();
84
85 // Ensures all service threads gracefully shutdown
86 service_thread_manager.reset();
87 service_threads.clear();
88
78 next_object_id = 0; 89 next_object_id = 0;
79 next_kernel_process_id = Process::InitialKIPIDMin; 90 next_kernel_process_id = Process::InitialKIPIDMin;
80 next_user_process_id = Process::ProcessIDMin; 91 next_user_process_id = Process::ProcessIDMin;
@@ -86,42 +97,29 @@ struct KernelCore::Impl {
86 } 97 }
87 } 98 }
88 99
89 for (std::size_t i = 0; i < cores.size(); i++) {
90 cores[i].Shutdown();
91 schedulers[i].reset();
92 }
93 cores.clear(); 100 cores.clear();
94 101
95 process_list.clear();
96 current_process = nullptr; 102 current_process = nullptr;
97 103
98 system_resource_limit = nullptr; 104 system_resource_limit = nullptr;
99 105
100 global_handle_table.Clear(); 106 global_handle_table.Clear();
101 preemption_event = nullptr;
102 107
103 global_scheduler.Shutdown(); 108 preemption_event = nullptr;
104 109
105 named_ports.clear(); 110 named_ports.clear();
106 111
107 for (auto& core : cores) {
108 core.Shutdown();
109 }
110 cores.clear();
111
112 exclusive_monitor.reset(); 112 exclusive_monitor.reset();
113 113
114 num_host_threads = 0; 114 // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
115 std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(), 115 next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
116 std::thread::id{});
117 std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
118 } 116 }
119 117
120 void InitializePhysicalCores() { 118 void InitializePhysicalCores() {
121 exclusive_monitor = 119 exclusive_monitor =
122 Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES); 120 Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
123 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { 121 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
124 schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i); 122 schedulers[i] = std::make_unique<Kernel::KScheduler>(system, i);
125 cores.emplace_back(i, system, *schedulers[i], interrupts); 123 cores.emplace_back(i, system, *schedulers[i], interrupts);
126 } 124 }
127 } 125 }
@@ -154,8 +152,8 @@ struct KernelCore::Impl {
154 preemption_event = Core::Timing::CreateEvent( 152 preemption_event = Core::Timing::CreateEvent(
155 "PreemptionCallback", [this, &kernel](std::uintptr_t, std::chrono::nanoseconds) { 153 "PreemptionCallback", [this, &kernel](std::uintptr_t, std::chrono::nanoseconds) {
156 { 154 {
157 SchedulerLock lock(kernel); 155 KScopedSchedulerLock lock(kernel);
158 global_scheduler.PreemptThreads(); 156 global_scheduler_context->PreemptThreads();
159 } 157 }
160 const auto time_interval = std::chrono::nanoseconds{ 158 const auto time_interval = std::chrono::nanoseconds{
161 Core::Timing::msToCycles(std::chrono::milliseconds(10))}; 159 Core::Timing::msToCycles(std::chrono::milliseconds(10))};
@@ -194,58 +192,52 @@ struct KernelCore::Impl {
194 } 192 }
195 } 193 }
196 194
195 /// Creates a new host thread ID, should only be called by GetHostThreadId
196 u32 AllocateHostThreadId(std::optional<std::size_t> core_id) {
197 if (core_id) {
198 // The first for slots are reserved for CPU core threads
199 ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES);
200 return static_cast<u32>(*core_id);
201 } else {
202 return next_host_thread_id++;
203 }
204 }
205
206 /// Gets the host thread ID for the caller, allocating a new one if this is the first time
207 u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) {
208 const thread_local auto host_thread_id{AllocateHostThreadId(core_id)};
209 return host_thread_id;
210 }
211
212 /// Registers a CPU core thread by allocating a host thread ID for it
197 void RegisterCoreThread(std::size_t core_id) { 213 void RegisterCoreThread(std::size_t core_id) {
198 const std::thread::id this_id = std::this_thread::get_id(); 214 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
215 const auto this_id = GetHostThreadId(core_id);
199 if (!is_multicore) { 216 if (!is_multicore) {
200 single_core_thread_id = this_id; 217 single_core_thread_id = this_id;
201 } 218 }
202 const auto end =
203 register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
204 const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
205 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
206 ASSERT(it == end);
207 InsertHostThread(static_cast<u32>(core_id));
208 } 219 }
209 220
221 /// Registers a new host thread by allocating a host thread ID for it
210 void RegisterHostThread() { 222 void RegisterHostThread() {
211 const std::thread::id this_id = std::this_thread::get_id(); 223 [[maybe_unused]] const auto this_id = GetHostThreadId();
212 const auto end =
213 register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
214 const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
215 if (it == end) {
216 InsertHostThread(registered_thread_ids++);
217 }
218 } 224 }
219 225
220 void InsertHostThread(u32 value) { 226 [[nodiscard]] u32 GetCurrentHostThreadID() {
221 const size_t index = num_host_threads++; 227 const auto this_id = GetHostThreadId();
222 ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads");
223 register_host_thread_values[index] = value;
224 register_host_thread_keys[index] = std::this_thread::get_id();
225 }
226
227 [[nodiscard]] u32 GetCurrentHostThreadID() const {
228 const std::thread::id this_id = std::this_thread::get_id();
229 if (!is_multicore && single_core_thread_id == this_id) { 228 if (!is_multicore && single_core_thread_id == this_id) {
230 return static_cast<u32>(system.GetCpuManager().CurrentCore()); 229 return static_cast<u32>(system.GetCpuManager().CurrentCore());
231 } 230 }
232 const auto end = 231 return this_id;
233 register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
234 const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
235 if (it == end) {
236 return Core::INVALID_HOST_THREAD_ID;
237 }
238 return register_host_thread_values[static_cast<size_t>(
239 std::distance(register_host_thread_keys.begin(), it))];
240 } 232 }
241 233
242 Core::EmuThreadHandle GetCurrentEmuThreadID() const { 234 [[nodiscard]] Core::EmuThreadHandle GetCurrentEmuThreadID() {
243 Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle(); 235 Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle();
244 result.host_handle = GetCurrentHostThreadID(); 236 result.host_handle = GetCurrentHostThreadID();
245 if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) { 237 if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) {
246 return result; 238 return result;
247 } 239 }
248 const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler(); 240 const Kernel::KScheduler& sched = cores[result.host_handle].Scheduler();
249 const Kernel::Thread* current = sched.GetCurrentThread(); 241 const Kernel::Thread* current = sched.GetCurrentThread();
250 if (current != nullptr && !current->IsPhantomMode()) { 242 if (current != nullptr && !current->IsPhantomMode()) {
251 result.guest_handle = current->GetGlobalHandle(); 243 result.guest_handle = current->GetGlobalHandle();
@@ -314,7 +306,7 @@ struct KernelCore::Impl {
314 // Lists all processes that exist in the current session. 306 // Lists all processes that exist in the current session.
315 std::vector<std::shared_ptr<Process>> process_list; 307 std::vector<std::shared_ptr<Process>> process_list;
316 Process* current_process = nullptr; 308 Process* current_process = nullptr;
317 Kernel::GlobalScheduler global_scheduler; 309 std::unique_ptr<Kernel::GlobalSchedulerContext> global_scheduler_context;
318 Kernel::Synchronization synchronization; 310 Kernel::Synchronization synchronization;
319 Kernel::TimeManager time_manager; 311 Kernel::TimeManager time_manager;
320 312
@@ -333,15 +325,8 @@ struct KernelCore::Impl {
333 std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; 325 std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
334 std::vector<Kernel::PhysicalCore> cores; 326 std::vector<Kernel::PhysicalCore> cores;
335 327
336 // 0-3 IDs represent core threads, >3 represent others 328 // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
337 std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES}; 329 std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES};
338
339 // Number of host threads is a relatively high number to avoid overflowing
340 static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
341 std::atomic<size_t> num_host_threads{0};
342 std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS>
343 register_host_thread_keys{};
344 std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{};
345 330
346 // Kernel memory management 331 // Kernel memory management
347 std::unique_ptr<Memory::MemoryManager> memory_manager; 332 std::unique_ptr<Memory::MemoryManager> memory_manager;
@@ -353,12 +338,19 @@ struct KernelCore::Impl {
353 std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; 338 std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
354 std::shared_ptr<Kernel::SharedMemory> time_shared_mem; 339 std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
355 340
341 // Threads used for services
342 std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
343
344 // Service threads are managed by a worker thread, so that a calling service thread can queue up
345 // the release of itself
346 std::unique_ptr<Common::ThreadWorker> service_thread_manager;
347
356 std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; 348 std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
357 std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; 349 std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
358 std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; 350 std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
359 351
360 bool is_multicore{}; 352 bool is_multicore{};
361 std::thread::id single_core_thread_id{}; 353 u32 single_core_thread_id{};
362 354
363 std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{}; 355 std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{};
364 356
@@ -415,19 +407,19 @@ const std::vector<std::shared_ptr<Process>>& KernelCore::GetProcessList() const
415 return impl->process_list; 407 return impl->process_list;
416} 408}
417 409
418Kernel::GlobalScheduler& KernelCore::GlobalScheduler() { 410Kernel::GlobalSchedulerContext& KernelCore::GlobalSchedulerContext() {
419 return impl->global_scheduler; 411 return *impl->global_scheduler_context;
420} 412}
421 413
422const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { 414const Kernel::GlobalSchedulerContext& KernelCore::GlobalSchedulerContext() const {
423 return impl->global_scheduler; 415 return *impl->global_scheduler_context;
424} 416}
425 417
426Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) { 418Kernel::KScheduler& KernelCore::Scheduler(std::size_t id) {
427 return *impl->schedulers[id]; 419 return *impl->schedulers[id];
428} 420}
429 421
430const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const { 422const Kernel::KScheduler& KernelCore::Scheduler(std::size_t id) const {
431 return *impl->schedulers[id]; 423 return *impl->schedulers[id];
432} 424}
433 425
@@ -451,16 +443,13 @@ const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
451 return impl->cores[core_id]; 443 return impl->cores[core_id];
452} 444}
453 445
454Kernel::Scheduler& KernelCore::CurrentScheduler() { 446Kernel::KScheduler* KernelCore::CurrentScheduler() {
455 u32 core_id = impl->GetCurrentHostThreadID(); 447 u32 core_id = impl->GetCurrentHostThreadID();
456 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); 448 if (core_id >= Core::Hardware::NUM_CPU_CORES) {
457 return *impl->schedulers[core_id]; 449 // This is expected when called from not a guest thread
458} 450 return {};
459 451 }
460const Kernel::Scheduler& KernelCore::CurrentScheduler() const { 452 return impl->schedulers[core_id].get();
461 u32 core_id = impl->GetCurrentHostThreadID();
462 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
463 return *impl->schedulers[core_id];
464} 453}
465 454
466std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() { 455std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() {
@@ -623,7 +612,7 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const {
623void KernelCore::Suspend(bool in_suspention) { 612void KernelCore::Suspend(bool in_suspention) {
624 const bool should_suspend = exception_exited || in_suspention; 613 const bool should_suspend = exception_exited || in_suspention;
625 { 614 {
626 SchedulerLock lock(*this); 615 KScopedSchedulerLock lock(*this);
627 ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep; 616 ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep;
628 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { 617 for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
629 impl->suspend_threads[i]->SetStatus(status); 618 impl->suspend_threads[i]->SetStatus(status);
@@ -650,4 +639,19 @@ void KernelCore::ExitSVCProfile() {
650 MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); 639 MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
651} 640}
652 641
642std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) {
643 auto service_thread = std::make_shared<Kernel::ServiceThread>(*this, 1, name);
644 impl->service_thread_manager->QueueWork(
645 [this, service_thread] { impl->service_threads.emplace(service_thread); });
646 return service_thread;
647}
648
649void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) {
650 impl->service_thread_manager->QueueWork([this, service_thread] {
651 if (auto strong_ptr = service_thread.lock()) {
652 impl->service_threads.erase(strong_ptr);
653 }
654 });
655}
656
653} // namespace Kernel 657} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index a73a93039..e3169f5a7 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -35,13 +35,14 @@ class SlabHeap;
35 35
36class AddressArbiter; 36class AddressArbiter;
37class ClientPort; 37class ClientPort;
38class GlobalScheduler; 38class GlobalSchedulerContext;
39class HandleTable; 39class HandleTable;
40class PhysicalCore; 40class PhysicalCore;
41class Process; 41class Process;
42class ResourceLimit; 42class ResourceLimit;
43class Scheduler; 43class KScheduler;
44class SharedMemory; 44class SharedMemory;
45class ServiceThread;
45class Synchronization; 46class Synchronization;
46class Thread; 47class Thread;
47class TimeManager; 48class TimeManager;
@@ -102,16 +103,16 @@ public:
102 const std::vector<std::shared_ptr<Process>>& GetProcessList() const; 103 const std::vector<std::shared_ptr<Process>>& GetProcessList() const;
103 104
104 /// Gets the sole instance of the global scheduler 105 /// Gets the sole instance of the global scheduler
105 Kernel::GlobalScheduler& GlobalScheduler(); 106 Kernel::GlobalSchedulerContext& GlobalSchedulerContext();
106 107
107 /// Gets the sole instance of the global scheduler 108 /// Gets the sole instance of the global scheduler
108 const Kernel::GlobalScheduler& GlobalScheduler() const; 109 const Kernel::GlobalSchedulerContext& GlobalSchedulerContext() const;
109 110
110 /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' 111 /// Gets the sole instance of the Scheduler assoviated with cpu core 'id'
111 Kernel::Scheduler& Scheduler(std::size_t id); 112 Kernel::KScheduler& Scheduler(std::size_t id);
112 113
113 /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' 114 /// Gets the sole instance of the Scheduler assoviated with cpu core 'id'
114 const Kernel::Scheduler& Scheduler(std::size_t id) const; 115 const Kernel::KScheduler& Scheduler(std::size_t id) const;
115 116
116 /// Gets the an instance of the respective physical CPU core. 117 /// Gets the an instance of the respective physical CPU core.
117 Kernel::PhysicalCore& PhysicalCore(std::size_t id); 118 Kernel::PhysicalCore& PhysicalCore(std::size_t id);
@@ -120,10 +121,7 @@ public:
120 const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; 121 const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
121 122
122 /// Gets the sole instance of the Scheduler at the current running core. 123 /// Gets the sole instance of the Scheduler at the current running core.
123 Kernel::Scheduler& CurrentScheduler(); 124 Kernel::KScheduler* CurrentScheduler();
124
125 /// Gets the sole instance of the Scheduler at the current running core.
126 const Kernel::Scheduler& CurrentScheduler() const;
127 125
128 /// Gets the an instance of the current physical CPU core. 126 /// Gets the an instance of the current physical CPU core.
129 Kernel::PhysicalCore& CurrentPhysicalCore(); 127 Kernel::PhysicalCore& CurrentPhysicalCore();
@@ -230,6 +228,22 @@ public:
230 228
231 void ExitSVCProfile(); 229 void ExitSVCProfile();
232 230
231 /**
232 * Creates an HLE service thread, which are used to execute service routines asynchronously.
233 * While these are allocated per ServerSession, these need to be owned and managed outside of
234 * ServerSession to avoid a circular dependency.
235 * @param name String name for the ServerSession creating this thread, used for debug purposes.
236 * @returns The a weak pointer newly created service thread.
237 */
238 std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name);
239
240 /**
241 * Releases a HLE service thread, instructing KernelCore to free it. This should be called when
242 * the ServerSession associated with the thread is destroyed.
243 * @param service_thread Service thread to release.
244 */
245 void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread);
246
233private: 247private:
234 friend class Object; 248 friend class Object;
235 friend class Process; 249 friend class Process;
diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/memory/memory_block.h
index 9d7839d08..37fe19916 100644
--- a/src/core/hle/kernel/memory/memory_block.h
+++ b/src/core/hle/kernel/memory/memory_block.h
@@ -222,9 +222,9 @@ public:
222 222
223public: 223public:
224 constexpr MemoryBlock() = default; 224 constexpr MemoryBlock() = default;
225 constexpr MemoryBlock(VAddr addr, std::size_t num_pages, MemoryState state, 225 constexpr MemoryBlock(VAddr addr_, std::size_t num_pages_, MemoryState state_,
226 MemoryPermission perm, MemoryAttribute attribute) 226 MemoryPermission perm_, MemoryAttribute attribute_)
227 : addr{addr}, num_pages(num_pages), state{state}, perm{perm}, attribute{attribute} {} 227 : addr{addr_}, num_pages(num_pages_), state{state_}, perm{perm_}, attribute{attribute_} {}
228 228
229 constexpr VAddr GetAddress() const { 229 constexpr VAddr GetAddress() const {
230 return addr; 230 return addr;
diff --git a/src/core/hle/kernel/memory/memory_block_manager.h b/src/core/hle/kernel/memory/memory_block_manager.h
index 6e1d41075..f57d1bbcc 100644
--- a/src/core/hle/kernel/memory/memory_block_manager.h
+++ b/src/core/hle/kernel/memory/memory_block_manager.h
@@ -57,8 +57,8 @@ public:
57private: 57private:
58 void MergeAdjacent(iterator it, iterator& next_it); 58 void MergeAdjacent(iterator it, iterator& next_it);
59 59
60 const VAddr start_addr; 60 [[maybe_unused]] const VAddr start_addr;
61 const VAddr end_addr; 61 [[maybe_unused]] const VAddr end_addr;
62 62
63 MemoryBlockTree memory_block_tree; 63 MemoryBlockTree memory_block_tree;
64}; 64};
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 8f6c944d1..4f8075e0e 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -11,11 +11,11 @@
11#include "core/core.h" 11#include "core/core.h"
12#include "core/hle/kernel/errors.h" 12#include "core/hle/kernel/errors.h"
13#include "core/hle/kernel/handle_table.h" 13#include "core/hle/kernel/handle_table.h"
14#include "core/hle/kernel/k_scheduler.h"
14#include "core/hle/kernel/kernel.h" 15#include "core/hle/kernel/kernel.h"
15#include "core/hle/kernel/mutex.h" 16#include "core/hle/kernel/mutex.h"
16#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
17#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/scheduler.h"
19#include "core/hle/kernel/thread.h" 19#include "core/hle/kernel/thread.h"
20#include "core/hle/result.h" 20#include "core/hle/result.h"
21#include "core/memory.h" 21#include "core/memory.h"
@@ -73,9 +73,9 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
73 73
74 auto& kernel = system.Kernel(); 74 auto& kernel = system.Kernel();
75 std::shared_ptr<Thread> current_thread = 75 std::shared_ptr<Thread> current_thread =
76 SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); 76 SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
77 { 77 {
78 SchedulerLock lock(kernel); 78 KScopedSchedulerLock lock(kernel);
79 // The mutex address must be 4-byte aligned 79 // The mutex address must be 4-byte aligned
80 if ((address % sizeof(u32)) != 0) { 80 if ((address % sizeof(u32)) != 0) {
81 return ERR_INVALID_ADDRESS; 81 return ERR_INVALID_ADDRESS;
@@ -114,7 +114,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
114 } 114 }
115 115
116 { 116 {
117 SchedulerLock lock(kernel); 117 KScopedSchedulerLock lock(kernel);
118 auto* owner = current_thread->GetLockOwner(); 118 auto* owner = current_thread->GetLockOwner();
119 if (owner != nullptr) { 119 if (owner != nullptr) {
120 owner->RemoveMutexWaiter(current_thread); 120 owner->RemoveMutexWaiter(current_thread);
@@ -153,10 +153,10 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr
153 153
154ResultCode Mutex::Release(VAddr address) { 154ResultCode Mutex::Release(VAddr address) {
155 auto& kernel = system.Kernel(); 155 auto& kernel = system.Kernel();
156 SchedulerLock lock(kernel); 156 KScopedSchedulerLock lock(kernel);
157 157
158 std::shared_ptr<Thread> current_thread = 158 std::shared_ptr<Thread> current_thread =
159 SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); 159 SharedFrom(kernel.CurrentScheduler()->GetCurrentThread());
160 160
161 auto [result, new_owner] = Unlock(current_thread, address); 161 auto [result, new_owner] = Unlock(current_thread, address);
162 162
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 50aca5752..7fea45f96 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -7,14 +7,14 @@
7#include "core/arm/dynarmic/arm_dynarmic_32.h" 7#include "core/arm/dynarmic/arm_dynarmic_32.h"
8#include "core/arm/dynarmic/arm_dynarmic_64.h" 8#include "core/arm/dynarmic/arm_dynarmic_64.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/hle/kernel/k_scheduler.h"
10#include "core/hle/kernel/kernel.h" 11#include "core/hle/kernel/kernel.h"
11#include "core/hle/kernel/physical_core.h" 12#include "core/hle/kernel/physical_core.h"
12#include "core/hle/kernel/scheduler.h"
13 13
14namespace Kernel { 14namespace Kernel {
15 15
16PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, 16PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system,
17 Kernel::Scheduler& scheduler, Core::CPUInterrupts& interrupts) 17 Kernel::KScheduler& scheduler, Core::CPUInterrupts& interrupts)
18 : core_index{core_index}, system{system}, scheduler{scheduler}, 18 : core_index{core_index}, system{system}, scheduler{scheduler},
19 interrupts{interrupts}, guard{std::make_unique<Common::SpinLock>()} {} 19 interrupts{interrupts}, guard{std::make_unique<Common::SpinLock>()} {}
20 20
@@ -43,10 +43,6 @@ void PhysicalCore::Idle() {
43 interrupts[core_index].AwaitInterrupt(); 43 interrupts[core_index].AwaitInterrupt();
44} 44}
45 45
46void PhysicalCore::Shutdown() {
47 scheduler.Shutdown();
48}
49
50bool PhysicalCore::IsInterrupted() const { 46bool PhysicalCore::IsInterrupted() const {
51 return interrupts[core_index].IsInterrupted(); 47 return interrupts[core_index].IsInterrupted();
52} 48}
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 37513130a..f2b0911aa 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -15,7 +15,7 @@ class SpinLock;
15} 15}
16 16
17namespace Kernel { 17namespace Kernel {
18class Scheduler; 18class KScheduler;
19} // namespace Kernel 19} // namespace Kernel
20 20
21namespace Core { 21namespace Core {
@@ -28,7 +28,7 @@ namespace Kernel {
28 28
29class PhysicalCore { 29class PhysicalCore {
30public: 30public:
31 PhysicalCore(std::size_t core_index, Core::System& system, Kernel::Scheduler& scheduler, 31 PhysicalCore(std::size_t core_index, Core::System& system, Kernel::KScheduler& scheduler,
32 Core::CPUInterrupts& interrupts); 32 Core::CPUInterrupts& interrupts);
33 ~PhysicalCore(); 33 ~PhysicalCore();
34 34
@@ -36,7 +36,7 @@ public:
36 PhysicalCore& operator=(const PhysicalCore&) = delete; 36 PhysicalCore& operator=(const PhysicalCore&) = delete;
37 37
38 PhysicalCore(PhysicalCore&&) = default; 38 PhysicalCore(PhysicalCore&&) = default;
39 PhysicalCore& operator=(PhysicalCore&&) = default; 39 PhysicalCore& operator=(PhysicalCore&&) = delete;
40 40
41 /// Initialize the core for the specified parameters. 41 /// Initialize the core for the specified parameters.
42 void Initialize(bool is_64_bit); 42 void Initialize(bool is_64_bit);
@@ -55,9 +55,6 @@ public:
55 /// Check if this core is interrupted 55 /// Check if this core is interrupted
56 bool IsInterrupted() const; 56 bool IsInterrupted() const;
57 57
58 // Shutdown this physical core.
59 void Shutdown();
60
61 bool IsInitialized() const { 58 bool IsInitialized() const {
62 return arm_interface != nullptr; 59 return arm_interface != nullptr;
63 } 60 }
@@ -82,18 +79,18 @@ public:
82 return core_index; 79 return core_index;
83 } 80 }
84 81
85 Kernel::Scheduler& Scheduler() { 82 Kernel::KScheduler& Scheduler() {
86 return scheduler; 83 return scheduler;
87 } 84 }
88 85
89 const Kernel::Scheduler& Scheduler() const { 86 const Kernel::KScheduler& Scheduler() const {
90 return scheduler; 87 return scheduler;
91 } 88 }
92 89
93private: 90private:
94 const std::size_t core_index; 91 const std::size_t core_index;
95 Core::System& system; 92 Core::System& system;
96 Kernel::Scheduler& scheduler; 93 Kernel::KScheduler& scheduler;
97 Core::CPUInterrupts& interrupts; 94 Core::CPUInterrupts& interrupts;
98 std::unique_ptr<Common::SpinLock> guard; 95 std::unique_ptr<Common::SpinLock> guard;
99 std::unique_ptr<Core::ARM_Interface> arm_interface; 96 std::unique_ptr<Core::ARM_Interface> arm_interface;
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b17529dee..b905b486a 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -15,13 +15,13 @@
15#include "core/file_sys/program_metadata.h" 15#include "core/file_sys/program_metadata.h"
16#include "core/hle/kernel/code_set.h" 16#include "core/hle/kernel/code_set.h"
17#include "core/hle/kernel/errors.h" 17#include "core/hle/kernel/errors.h"
18#include "core/hle/kernel/k_scheduler.h"
18#include "core/hle/kernel/kernel.h" 19#include "core/hle/kernel/kernel.h"
19#include "core/hle/kernel/memory/memory_block_manager.h" 20#include "core/hle/kernel/memory/memory_block_manager.h"
20#include "core/hle/kernel/memory/page_table.h" 21#include "core/hle/kernel/memory/page_table.h"
21#include "core/hle/kernel/memory/slab_heap.h" 22#include "core/hle/kernel/memory/slab_heap.h"
22#include "core/hle/kernel/process.h" 23#include "core/hle/kernel/process.h"
23#include "core/hle/kernel/resource_limit.h" 24#include "core/hle/kernel/resource_limit.h"
24#include "core/hle/kernel/scheduler.h"
25#include "core/hle/kernel/thread.h" 25#include "core/hle/kernel/thread.h"
26#include "core/hle/lock.h" 26#include "core/hle/lock.h"
27#include "core/memory.h" 27#include "core/memory.h"
@@ -54,7 +54,7 @@ void SetupMainThread(Core::System& system, Process& owner_process, u32 priority,
54 auto& kernel = system.Kernel(); 54 auto& kernel = system.Kernel();
55 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires 55 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
56 { 56 {
57 SchedulerLock lock{kernel}; 57 KScopedSchedulerLock lock{kernel};
58 thread->SetStatus(ThreadStatus::Ready); 58 thread->SetStatus(ThreadStatus::Ready);
59 } 59 }
60} 60}
@@ -213,7 +213,7 @@ void Process::UnregisterThread(const Thread* thread) {
213} 213}
214 214
215ResultCode Process::ClearSignalState() { 215ResultCode Process::ClearSignalState() {
216 SchedulerLock lock(system.Kernel()); 216 KScopedSchedulerLock lock(system.Kernel());
217 if (status == ProcessStatus::Exited) { 217 if (status == ProcessStatus::Exited) {
218 LOG_ERROR(Kernel, "called on a terminated process instance."); 218 LOG_ERROR(Kernel, "called on a terminated process instance.");
219 return ERR_INVALID_STATE; 219 return ERR_INVALID_STATE;
@@ -314,7 +314,7 @@ void Process::PrepareForTermination() {
314 if (thread->GetOwnerProcess() != this) 314 if (thread->GetOwnerProcess() != this)
315 continue; 315 continue;
316 316
317 if (thread.get() == system.CurrentScheduler().GetCurrentThread()) 317 if (thread.get() == kernel.CurrentScheduler()->GetCurrentThread())
318 continue; 318 continue;
319 319
320 // TODO(Subv): When are the other running/ready threads terminated? 320 // TODO(Subv): When are the other running/ready threads terminated?
@@ -325,7 +325,7 @@ void Process::PrepareForTermination() {
325 } 325 }
326 }; 326 };
327 327
328 stop_threads(system.GlobalScheduler().GetThreadList()); 328 stop_threads(system.GlobalSchedulerContext().GetThreadList());
329 329
330 FreeTLSRegion(tls_region_address); 330 FreeTLSRegion(tls_region_address);
331 tls_region_address = 0; 331 tls_region_address = 0;
@@ -347,7 +347,7 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
347} 347}
348 348
349VAddr Process::CreateTLSRegion() { 349VAddr Process::CreateTLSRegion() {
350 SchedulerLock lock(system.Kernel()); 350 KScopedSchedulerLock lock(system.Kernel());
351 if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; 351 if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)};
352 tls_page_iter != tls_pages.cend()) { 352 tls_page_iter != tls_pages.cend()) {
353 return *tls_page_iter->ReserveSlot(); 353 return *tls_page_iter->ReserveSlot();
@@ -378,7 +378,7 @@ VAddr Process::CreateTLSRegion() {
378} 378}
379 379
380void Process::FreeTLSRegion(VAddr tls_address) { 380void Process::FreeTLSRegion(VAddr tls_address) {
381 SchedulerLock lock(system.Kernel()); 381 KScopedSchedulerLock lock(system.Kernel());
382 const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); 382 const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE);
383 auto iter = 383 auto iter =
384 std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { 384 std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index f45cb5674..e412e58aa 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -216,6 +216,16 @@ public:
216 total_process_running_time_ticks += ticks; 216 total_process_running_time_ticks += ticks;
217 } 217 }
218 218
219 /// Gets the process schedule count, used for thread yelding
220 s64 GetScheduledCount() const {
221 return schedule_count;
222 }
223
224 /// Increments the process schedule count, used for thread yielding.
225 void IncrementScheduledCount() {
226 ++schedule_count;
227 }
228
219 /// Gets 8 bytes of random data for svcGetInfo RandomEntropy 229 /// Gets 8 bytes of random data for svcGetInfo RandomEntropy
220 u64 GetRandomEntropy(std::size_t index) const { 230 u64 GetRandomEntropy(std::size_t index) const {
221 return random_entropy.at(index); 231 return random_entropy.at(index);
@@ -397,6 +407,9 @@ private:
397 /// Name of this process 407 /// Name of this process
398 std::string name; 408 std::string name;
399 409
410 /// Schedule count of this process
411 s64 schedule_count{};
412
400 /// System context 413 /// System context
401 Core::System& system; 414 Core::System& system;
402}; 415};
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 63880f13d..0f128c586 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -199,7 +199,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
199 break; 199 break;
200 } 200 }
201 201
202 LOG_ERROR(Kernel, "Invalid capability type! type={}", static_cast<u32>(type)); 202 LOG_ERROR(Kernel, "Invalid capability type! type={}", type);
203 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 203 return ERR_INVALID_CAPABILITY_DESCRIPTOR;
204} 204}
205 205
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 6e286419e..cea262ce0 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -6,10 +6,10 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/hle/kernel/errors.h" 8#include "core/hle/kernel/errors.h"
9#include "core/hle/kernel/k_scheduler.h"
9#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
10#include "core/hle/kernel/object.h" 11#include "core/hle/kernel/object.h"
11#include "core/hle/kernel/readable_event.h" 12#include "core/hle/kernel/readable_event.h"
12#include "core/hle/kernel/scheduler.h"
13#include "core/hle/kernel/thread.h" 13#include "core/hle/kernel/thread.h"
14 14
15namespace Kernel { 15namespace Kernel {
@@ -39,7 +39,7 @@ void ReadableEvent::Clear() {
39} 39}
40 40
41ResultCode ReadableEvent::Reset() { 41ResultCode ReadableEvent::Reset() {
42 SchedulerLock lock(kernel); 42 KScopedSchedulerLock lock(kernel);
43 if (!is_signaled) { 43 if (!is_signaled) {
44 LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", 44 LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
45 GetObjectId(), GetTypeName(), GetName()); 45 GetObjectId(), GetTypeName(), GetName());
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 212e442f4..7bf50339d 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -65,8 +65,8 @@ ResultCode ResourceLimit::SetLimitValue(ResourceType resource, s64 value) {
65 limit[index] = value; 65 limit[index] = value;
66 return RESULT_SUCCESS; 66 return RESULT_SUCCESS;
67 } else { 67 } else {
68 LOG_ERROR(Kernel, "Limit value is too large! resource={}, value={}, index={}", 68 LOG_ERROR(Kernel, "Limit value is too large! resource={}, value={}, index={}", resource,
69 static_cast<u32>(resource), value, index); 69 value, index);
70 return ERR_INVALID_STATE; 70 return ERR_INVALID_STATE;
71 } 71 }
72} 72}
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
deleted file mode 100644
index 5c63b0b4a..000000000
--- a/src/core/hle/kernel/scheduler.cpp
+++ /dev/null
@@ -1,819 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4//
5// SelectThreads, Yield functions originally by TuxSH.
6// licensed under GPLv2 or later under exception provided by the author.
7
8#include <algorithm>
9#include <mutex>
10#include <set>
11#include <unordered_set>
12#include <utility>
13
14#include "common/assert.h"
15#include "common/bit_util.h"
16#include "common/fiber.h"
17#include "common/logging/log.h"
18#include "core/arm/arm_interface.h"
19#include "core/core.h"
20#include "core/core_timing.h"
21#include "core/cpu_manager.h"
22#include "core/hle/kernel/kernel.h"
23#include "core/hle/kernel/physical_core.h"
24#include "core/hle/kernel/process.h"
25#include "core/hle/kernel/scheduler.h"
26#include "core/hle/kernel/time_manager.h"
27
28namespace Kernel {
29
30GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}
31
32GlobalScheduler::~GlobalScheduler() = default;
33
34void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) {
35 std::scoped_lock lock{global_list_guard};
36 thread_list.push_back(std::move(thread));
37}
38
39void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {
40 std::scoped_lock lock{global_list_guard};
41 thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
42 thread_list.end());
43}
44
45u32 GlobalScheduler::SelectThreads() {
46 ASSERT(is_locked);
47 const auto update_thread = [](Thread* thread, Scheduler& sched) {
48 std::scoped_lock lock{sched.guard};
49 if (thread != sched.selected_thread_set.get()) {
50 if (thread == nullptr) {
51 ++sched.idle_selection_count;
52 }
53 sched.selected_thread_set = SharedFrom(thread);
54 }
55 const bool reschedule_pending =
56 sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread);
57 sched.is_context_switch_pending = reschedule_pending;
58 std::atomic_thread_fence(std::memory_order_seq_cst);
59 return reschedule_pending;
60 };
61 if (!is_reselection_pending.load()) {
62 return 0;
63 }
64 std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{};
65
66 u32 idle_cores{};
67
68 // Step 1: Get top thread in schedule queue.
69 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
70 Thread* top_thread =
71 scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
72 if (top_thread != nullptr) {
73 // TODO(Blinkhawk): Implement Thread Pinning
74 } else {
75 idle_cores |= (1U << core);
76 }
77 top_threads[core] = top_thread;
78 }
79
80 while (idle_cores != 0) {
81 u32 core_id = Common::CountTrailingZeroes32(idle_cores);
82
83 if (!suggested_queue[core_id].empty()) {
84 std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{};
85 std::size_t num_candidates = 0;
86 auto iter = suggested_queue[core_id].begin();
87 Thread* suggested = nullptr;
88 // Step 2: Try selecting a suggested thread.
89 while (iter != suggested_queue[core_id].end()) {
90 suggested = *iter;
91 iter++;
92 s32 suggested_core_id = suggested->GetProcessorID();
93 Thread* top_thread =
94 suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr;
95 if (top_thread != suggested) {
96 if (top_thread != nullptr &&
97 top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) {
98 suggested = nullptr;
99 break;
100 // There's a too high thread to do core migration, cancel
101 }
102 TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested);
103 break;
104 }
105 suggested = nullptr;
106 migration_candidates[num_candidates++] = suggested_core_id;
107 }
108 // Step 3: Select a suggested thread from another core
109 if (suggested == nullptr) {
110 for (std::size_t i = 0; i < num_candidates; i++) {
111 s32 candidate_core = migration_candidates[i];
112 suggested = top_threads[candidate_core];
113 auto it = scheduled_queue[candidate_core].begin();
114 it++;
115 Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr;
116 if (next != nullptr) {
117 TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id),
118 suggested);
119 top_threads[candidate_core] = next;
120 break;
121 } else {
122 suggested = nullptr;
123 }
124 }
125 }
126 top_threads[core_id] = suggested;
127 }
128
129 idle_cores &= ~(1U << core_id);
130 }
131 u32 cores_needing_context_switch{};
132 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
133 Scheduler& sched = kernel.Scheduler(core);
134 ASSERT(top_threads[core] == nullptr ||
135 static_cast<u32>(top_threads[core]->GetProcessorID()) == core);
136 if (update_thread(top_threads[core], sched)) {
137 cores_needing_context_switch |= (1U << core);
138 }
139 }
140 return cores_needing_context_switch;
141}
142
143bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
144 ASSERT(is_locked);
145 // Note: caller should use critical section, etc.
146 if (!yielding_thread->IsRunnable()) {
147 // Normally this case shouldn't happen except for SetThreadActivity.
148 is_reselection_pending.store(true, std::memory_order_release);
149 return false;
150 }
151 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
152 const u32 priority = yielding_thread->GetPriority();
153
154 // Yield the thread
155 Reschedule(priority, core_id, yielding_thread);
156 const Thread* const winner = scheduled_queue[core_id].front();
157 if (kernel.GetCurrentHostThreadID() != core_id) {
158 is_reselection_pending.store(true, std::memory_order_release);
159 }
160
161 return AskForReselectionOrMarkRedundant(yielding_thread, winner);
162}
163
164bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
165 ASSERT(is_locked);
166 // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
167 // etc.
168 if (!yielding_thread->IsRunnable()) {
169 // Normally this case shouldn't happen except for SetThreadActivity.
170 is_reselection_pending.store(true, std::memory_order_release);
171 return false;
172 }
173 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
174 const u32 priority = yielding_thread->GetPriority();
175
176 // Yield the thread
177 Reschedule(priority, core_id, yielding_thread);
178
179 std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
180 for (std::size_t i = 0; i < current_threads.size(); i++) {
181 current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
182 }
183
184 Thread* next_thread = scheduled_queue[core_id].front(priority);
185 Thread* winner = nullptr;
186 for (auto& thread : suggested_queue[core_id]) {
187 const s32 source_core = thread->GetProcessorID();
188 if (source_core >= 0) {
189 if (current_threads[source_core] != nullptr) {
190 if (thread == current_threads[source_core] ||
191 current_threads[source_core]->GetPriority() < min_regular_priority) {
192 continue;
193 }
194 }
195 }
196 if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
197 next_thread->GetPriority() < thread->GetPriority()) {
198 if (thread->GetPriority() <= priority) {
199 winner = thread;
200 break;
201 }
202 }
203 }
204
205 if (winner != nullptr) {
206 if (winner != yielding_thread) {
207 TransferToCore(winner->GetPriority(), s32(core_id), winner);
208 }
209 } else {
210 winner = next_thread;
211 }
212
213 if (kernel.GetCurrentHostThreadID() != core_id) {
214 is_reselection_pending.store(true, std::memory_order_release);
215 }
216
217 return AskForReselectionOrMarkRedundant(yielding_thread, winner);
218}
219
220bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
221 ASSERT(is_locked);
222 // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
223 // etc.
224 if (!yielding_thread->IsRunnable()) {
225 // Normally this case shouldn't happen except for SetThreadActivity.
226 is_reselection_pending.store(true, std::memory_order_release);
227 return false;
228 }
229 Thread* winner = nullptr;
230 const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
231
232 // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead
233 TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread);
234
235 // If the core is idle, perform load balancing, excluding the threads that have just used this
236 // function...
237 if (scheduled_queue[core_id].empty()) {
238 // Here, "current_threads" is calculated after the ""yield"", unlike yield -1
239 std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
240 for (std::size_t i = 0; i < current_threads.size(); i++) {
241 current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
242 }
243 for (auto& thread : suggested_queue[core_id]) {
244 const s32 source_core = thread->GetProcessorID();
245 if (source_core < 0 || thread == current_threads[source_core]) {
246 continue;
247 }
248 if (current_threads[source_core] == nullptr ||
249 current_threads[source_core]->GetPriority() >= min_regular_priority) {
250 winner = thread;
251 }
252 break;
253 }
254 if (winner != nullptr) {
255 if (winner != yielding_thread) {
256 TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner);
257 }
258 } else {
259 winner = yielding_thread;
260 }
261 } else {
262 winner = scheduled_queue[core_id].front();
263 }
264
265 if (kernel.GetCurrentHostThreadID() != core_id) {
266 is_reselection_pending.store(true, std::memory_order_release);
267 }
268
269 return AskForReselectionOrMarkRedundant(yielding_thread, winner);
270}
271
272void GlobalScheduler::PreemptThreads() {
273 ASSERT(is_locked);
274 for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
275 const u32 priority = preemption_priorities[core_id];
276
277 if (scheduled_queue[core_id].size(priority) > 0) {
278 if (scheduled_queue[core_id].size(priority) > 1) {
279 scheduled_queue[core_id].front(priority)->IncrementYieldCount();
280 }
281 scheduled_queue[core_id].yield(priority);
282 if (scheduled_queue[core_id].size(priority) > 1) {
283 scheduled_queue[core_id].front(priority)->IncrementYieldCount();
284 }
285 }
286
287 Thread* current_thread =
288 scheduled_queue[core_id].empty() ? nullptr : scheduled_queue[core_id].front();
289 Thread* winner = nullptr;
290 for (auto& thread : suggested_queue[core_id]) {
291 const s32 source_core = thread->GetProcessorID();
292 if (thread->GetPriority() != priority) {
293 continue;
294 }
295 if (source_core >= 0) {
296 Thread* next_thread = scheduled_queue[source_core].empty()
297 ? nullptr
298 : scheduled_queue[source_core].front();
299 if (next_thread != nullptr && next_thread->GetPriority() < 2) {
300 break;
301 }
302 if (next_thread == thread) {
303 continue;
304 }
305 }
306 if (current_thread != nullptr &&
307 current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
308 winner = thread;
309 break;
310 }
311 }
312
313 if (winner != nullptr) {
314 TransferToCore(winner->GetPriority(), s32(core_id), winner);
315 current_thread =
316 winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
317 }
318
319 if (current_thread != nullptr && current_thread->GetPriority() > priority) {
320 for (auto& thread : suggested_queue[core_id]) {
321 const s32 source_core = thread->GetProcessorID();
322 if (thread->GetPriority() < priority) {
323 continue;
324 }
325 if (source_core >= 0) {
326 Thread* next_thread = scheduled_queue[source_core].empty()
327 ? nullptr
328 : scheduled_queue[source_core].front();
329 if (next_thread != nullptr && next_thread->GetPriority() < 2) {
330 break;
331 }
332 if (next_thread == thread) {
333 continue;
334 }
335 }
336 if (current_thread != nullptr &&
337 current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
338 winner = thread;
339 break;
340 }
341 }
342
343 if (winner != nullptr) {
344 TransferToCore(winner->GetPriority(), s32(core_id), winner);
345 current_thread = winner;
346 }
347 }
348
349 is_reselection_pending.store(true, std::memory_order_release);
350 }
351}
352
353void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule,
354 Core::EmuThreadHandle global_thread) {
355 u32 current_core = global_thread.host_handle;
356 bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
357 (current_core < Core::Hardware::NUM_CPU_CORES);
358 while (cores_pending_reschedule != 0) {
359 u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule);
360 ASSERT(core < Core::Hardware::NUM_CPU_CORES);
361 if (!must_context_switch || core != current_core) {
362 auto& phys_core = kernel.PhysicalCore(core);
363 phys_core.Interrupt();
364 } else {
365 must_context_switch = true;
366 }
367 cores_pending_reschedule &= ~(1U << core);
368 }
369 if (must_context_switch) {
370 auto& core_scheduler = kernel.CurrentScheduler();
371 kernel.ExitSVCProfile();
372 core_scheduler.TryDoContextSwitch();
373 kernel.EnterSVCProfile();
374 }
375}
376
377void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) {
378 ASSERT(is_locked);
379 suggested_queue[core].add(thread, priority);
380}
381
382void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) {
383 ASSERT(is_locked);
384 suggested_queue[core].remove(thread, priority);
385}
386
387void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) {
388 ASSERT(is_locked);
389 ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
390 scheduled_queue[core].add(thread, priority);
391}
392
393void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) {
394 ASSERT(is_locked);
395 ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
396 scheduled_queue[core].add(thread, priority, false);
397}
398
399void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) {
400 ASSERT(is_locked);
401 scheduled_queue[core].remove(thread, priority);
402 scheduled_queue[core].add(thread, priority);
403}
404
405void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) {
406 ASSERT(is_locked);
407 scheduled_queue[core].remove(thread, priority);
408}
409
410void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
411 ASSERT(is_locked);
412 const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
413 const s32 source_core = thread->GetProcessorID();
414 if (source_core == destination_core || !schedulable) {
415 return;
416 }
417 thread->SetProcessorID(destination_core);
418 if (source_core >= 0) {
419 Unschedule(priority, static_cast<u32>(source_core), thread);
420 }
421 if (destination_core >= 0) {
422 Unsuggest(priority, static_cast<u32>(destination_core), thread);
423 Schedule(priority, static_cast<u32>(destination_core), thread);
424 }
425 if (source_core >= 0) {
426 Suggest(priority, static_cast<u32>(source_core), thread);
427 }
428}
429
430bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
431 const Thread* winner) {
432 if (current_thread == winner) {
433 current_thread->IncrementYieldCount();
434 return true;
435 } else {
436 is_reselection_pending.store(true, std::memory_order_release);
437 return false;
438 }
439}
440
441void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
442 if (old_flags == thread->scheduling_state) {
443 return;
444 }
445 ASSERT(is_locked);
446
447 if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) {
448 // In this case the thread was running, now it's pausing/exitting
449 if (thread->processor_id >= 0) {
450 Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
451 }
452
453 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
454 if (core != static_cast<u32>(thread->processor_id) &&
455 ((thread->affinity_mask >> core) & 1) != 0) {
456 Unsuggest(thread->current_priority, core, thread);
457 }
458 }
459 } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
460 // The thread is now set to running from being stopped
461 if (thread->processor_id >= 0) {
462 Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
463 }
464
465 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
466 if (core != static_cast<u32>(thread->processor_id) &&
467 ((thread->affinity_mask >> core) & 1) != 0) {
468 Suggest(thread->current_priority, core, thread);
469 }
470 }
471 }
472
473 SetReselectionPending();
474}
475
476void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) {
477 if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) {
478 return;
479 }
480 ASSERT(is_locked);
481 if (thread->processor_id >= 0) {
482 Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread);
483 }
484
485 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
486 if (core != static_cast<u32>(thread->processor_id) &&
487 ((thread->affinity_mask >> core) & 1) != 0) {
488 Unsuggest(old_priority, core, thread);
489 }
490 }
491
492 if (thread->processor_id >= 0) {
493 if (thread == kernel.CurrentScheduler().GetCurrentThread()) {
494 SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id),
495 thread);
496 } else {
497 Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
498 }
499 }
500
501 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
502 if (core != static_cast<u32>(thread->processor_id) &&
503 ((thread->affinity_mask >> core) & 1) != 0) {
504 Suggest(thread->current_priority, core, thread);
505 }
506 }
507 thread->IncrementYieldCount();
508 SetReselectionPending();
509}
510
511void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask,
512 s32 old_core) {
513 if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) ||
514 thread->current_priority >= THREADPRIO_COUNT) {
515 return;
516 }
517 ASSERT(is_locked);
518
519 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
520 if (((old_affinity_mask >> core) & 1) != 0) {
521 if (core == static_cast<u32>(old_core)) {
522 Unschedule(thread->current_priority, core, thread);
523 } else {
524 Unsuggest(thread->current_priority, core, thread);
525 }
526 }
527 }
528
529 for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
530 if (((thread->affinity_mask >> core) & 1) != 0) {
531 if (core == static_cast<u32>(thread->processor_id)) {
532 Schedule(thread->current_priority, core, thread);
533 } else {
534 Suggest(thread->current_priority, core, thread);
535 }
536 }
537 }
538
539 thread->IncrementYieldCount();
540 SetReselectionPending();
541}
542
543void GlobalScheduler::Shutdown() {
544 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
545 scheduled_queue[core].clear();
546 suggested_queue[core].clear();
547 }
548 thread_list.clear();
549}
550
551void GlobalScheduler::Lock() {
552 Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID();
553 ASSERT(!current_thread.IsInvalid());
554 if (current_thread == current_owner) {
555 ++scope_lock;
556 } else {
557 inner_lock.lock();
558 is_locked = true;
559 current_owner = current_thread;
560 ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle());
561 scope_lock = 1;
562 }
563}
564
565void GlobalScheduler::Unlock() {
566 if (--scope_lock != 0) {
567 ASSERT(scope_lock > 0);
568 return;
569 }
570 u32 cores_pending_reschedule = SelectThreads();
571 Core::EmuThreadHandle leaving_thread = current_owner;
572 current_owner = Core::EmuThreadHandle::InvalidHandle();
573 scope_lock = 1;
574 is_locked = false;
575 inner_lock.unlock();
576 EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);
577}
578
579Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) {
580 switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this);
581}
582
583Scheduler::~Scheduler() = default;
584
585bool Scheduler::HaveReadyThreads() const {
586 return system.GlobalScheduler().HaveReadyThreads(core_id);
587}
588
589Thread* Scheduler::GetCurrentThread() const {
590 if (current_thread) {
591 return current_thread.get();
592 }
593 return idle_thread.get();
594}
595
596Thread* Scheduler::GetSelectedThread() const {
597 return selected_thread.get();
598}
599
600u64 Scheduler::GetLastContextSwitchTicks() const {
601 return last_context_switch_time;
602}
603
604void Scheduler::TryDoContextSwitch() {
605 auto& phys_core = system.Kernel().CurrentPhysicalCore();
606 if (phys_core.IsInterrupted()) {
607 phys_core.ClearInterrupt();
608 }
609 guard.lock();
610 if (is_context_switch_pending) {
611 SwitchContext();
612 } else {
613 guard.unlock();
614 }
615}
616
617void Scheduler::OnThreadStart() {
618 SwitchContextStep2();
619}
620
621void Scheduler::Unload(Thread* thread) {
622 if (thread) {
623 thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
624 thread->SetIsRunning(false);
625 if (thread->IsContinuousOnSVC() && !thread->IsHLEThread()) {
626 system.ArmInterface(core_id).ExceptionalExit();
627 thread->SetContinuousOnSVC(false);
628 }
629 if (!thread->IsHLEThread() && !thread->HasExited()) {
630 Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
631 cpu_core.SaveContext(thread->GetContext32());
632 cpu_core.SaveContext(thread->GetContext64());
633 // Save the TPIDR_EL0 system register in case it was modified.
634 thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
635 cpu_core.ClearExclusiveState();
636 }
637 thread->context_guard.unlock();
638 }
639}
640
641void Scheduler::Unload() {
642 Unload(current_thread.get());
643}
644
645void Scheduler::Reload(Thread* thread) {
646 if (thread) {
647 ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
648 "Thread must be runnable.");
649
650 // Cancel any outstanding wakeup events for this thread
651 thread->SetIsRunning(true);
652 thread->SetWasRunning(false);
653 thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
654
655 auto* const thread_owner_process = thread->GetOwnerProcess();
656 if (thread_owner_process != nullptr) {
657 system.Kernel().MakeCurrentProcess(thread_owner_process);
658 }
659 if (!thread->IsHLEThread()) {
660 Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
661 cpu_core.LoadContext(thread->GetContext32());
662 cpu_core.LoadContext(thread->GetContext64());
663 cpu_core.SetTlsAddress(thread->GetTLSAddress());
664 cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
665 cpu_core.ClearExclusiveState();
666 }
667 }
668}
669
670void Scheduler::Reload() {
671 Reload(current_thread.get());
672}
673
674void Scheduler::SwitchContextStep2() {
675 // Load context of new thread
676 Reload(selected_thread.get());
677
678 TryDoContextSwitch();
679}
680
681void Scheduler::SwitchContext() {
682 current_thread_prev = current_thread;
683 selected_thread = selected_thread_set;
684 Thread* previous_thread = current_thread_prev.get();
685 Thread* new_thread = selected_thread.get();
686 current_thread = selected_thread;
687
688 is_context_switch_pending = false;
689
690 if (new_thread == previous_thread) {
691 guard.unlock();
692 return;
693 }
694
695 Process* const previous_process = system.Kernel().CurrentProcess();
696
697 UpdateLastContextSwitchTime(previous_thread, previous_process);
698
699 // Save context for previous thread
700 Unload(previous_thread);
701
702 std::shared_ptr<Common::Fiber>* old_context;
703 if (previous_thread != nullptr) {
704 old_context = &previous_thread->GetHostContext();
705 } else {
706 old_context = &idle_thread->GetHostContext();
707 }
708 guard.unlock();
709
710 Common::Fiber::YieldTo(*old_context, switch_fiber);
711 /// When a thread wakes up, the scheduler may have changed to other in another core.
712 auto& next_scheduler = system.Kernel().CurrentScheduler();
713 next_scheduler.SwitchContextStep2();
714}
715
716void Scheduler::OnSwitch(void* this_scheduler) {
717 Scheduler* sched = static_cast<Scheduler*>(this_scheduler);
718 sched->SwitchToCurrent();
719}
720
721void Scheduler::SwitchToCurrent() {
722 while (true) {
723 {
724 std::scoped_lock lock{guard};
725 selected_thread = selected_thread_set;
726 current_thread = selected_thread;
727 is_context_switch_pending = false;
728 }
729 const auto is_switch_pending = [this] {
730 std::scoped_lock lock{guard};
731 return is_context_switch_pending;
732 };
733 do {
734 if (current_thread != nullptr && !current_thread->IsHLEThread()) {
735 current_thread->context_guard.lock();
736 if (!current_thread->IsRunnable()) {
737 current_thread->context_guard.unlock();
738 break;
739 }
740 if (static_cast<u32>(current_thread->GetProcessorID()) != core_id) {
741 current_thread->context_guard.unlock();
742 break;
743 }
744 }
745 std::shared_ptr<Common::Fiber>* next_context;
746 if (current_thread != nullptr) {
747 next_context = &current_thread->GetHostContext();
748 } else {
749 next_context = &idle_thread->GetHostContext();
750 }
751 Common::Fiber::YieldTo(switch_fiber, *next_context);
752 } while (!is_switch_pending());
753 }
754}
755
756void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
757 const u64 prev_switch_ticks = last_context_switch_time;
758 const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
759 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
760
761 if (thread != nullptr) {
762 thread->UpdateCPUTimeTicks(update_ticks);
763 }
764
765 if (process != nullptr) {
766 process->UpdateCPUTimeTicks(update_ticks);
767 }
768
769 last_context_switch_time = most_recent_switch_ticks;
770}
771
772void Scheduler::Initialize() {
773 std::string name = "Idle Thread Id:" + std::to_string(core_id);
774 std::function<void(void*)> init_func = Core::CpuManager::GetIdleThreadStartFunc();
775 void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
776 ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE);
777 auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0,
778 nullptr, std::move(init_func), init_func_parameter);
779 idle_thread = std::move(thread_res).Unwrap();
780}
781
782void Scheduler::Shutdown() {
783 current_thread = nullptr;
784 selected_thread = nullptr;
785}
786
787SchedulerLock::SchedulerLock(KernelCore& kernel) : kernel{kernel} {
788 kernel.GlobalScheduler().Lock();
789}
790
791SchedulerLock::~SchedulerLock() {
792 kernel.GlobalScheduler().Unlock();
793}
794
795SchedulerLockAndSleep::SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle,
796 Thread* time_task, s64 nanoseconds)
797 : SchedulerLock{kernel}, event_handle{event_handle}, time_task{time_task}, nanoseconds{
798 nanoseconds} {
799 event_handle = InvalidHandle;
800}
801
802SchedulerLockAndSleep::~SchedulerLockAndSleep() {
803 if (sleep_cancelled) {
804 return;
805 }
806 auto& time_manager = kernel.TimeManager();
807 time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
808}
809
810void SchedulerLockAndSleep::Release() {
811 if (sleep_cancelled) {
812 return;
813 }
814 auto& time_manager = kernel.TimeManager();
815 time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
816 sleep_cancelled = true;
817}
818
819} // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
deleted file mode 100644
index 68db4a5ef..000000000
--- a/src/core/hle/kernel/scheduler.h
+++ /dev/null
@@ -1,320 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <memory>
9#include <mutex>
10#include <vector>
11
12#include "common/common_types.h"
13#include "common/multi_level_queue.h"
14#include "common/spin_lock.h"
15#include "core/hardware_properties.h"
16#include "core/hle/kernel/thread.h"
17
18namespace Common {
19class Fiber;
20}
21
22namespace Core {
23class ARM_Interface;
24class System;
25} // namespace Core
26
27namespace Kernel {
28
29class KernelCore;
30class Process;
31class SchedulerLock;
32
33class GlobalScheduler final {
34public:
35 explicit GlobalScheduler(KernelCore& kernel);
36 ~GlobalScheduler();
37
38 /// Adds a new thread to the scheduler
39 void AddThread(std::shared_ptr<Thread> thread);
40
41 /// Removes a thread from the scheduler
42 void RemoveThread(std::shared_ptr<Thread> thread);
43
44 /// Returns a list of all threads managed by the scheduler
45 const std::vector<std::shared_ptr<Thread>>& GetThreadList() const {
46 return thread_list;
47 }
48
49 /// Notify the scheduler a thread's status has changed.
50 void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags);
51
52 /// Notify the scheduler a thread's priority has changed.
53 void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority);
54
55 /// Notify the scheduler a thread's core and/or affinity mask has changed.
56 void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core);
57
58 /**
59 * Takes care of selecting the new scheduled threads in three steps:
60 *
61 * 1. First a thread is selected from the top of the priority queue. If no thread
62 * is obtained then we move to step two, else we are done.
63 *
64 * 2. Second we try to get a suggested thread that's not assigned to any core or
65 * that is not the top thread in that core.
66 *
67 * 3. Third is no suggested thread is found, we do a second pass and pick a running
68 * thread in another core and swap it with its current thread.
69 *
70 * returns the cores needing scheduling.
71 */
72 u32 SelectThreads();
73
74 bool HaveReadyThreads(std::size_t core_id) const {
75 return !scheduled_queue[core_id].empty();
76 }
77
78 /**
79 * Takes a thread and moves it to the back of the it's priority list.
80 *
81 * @note This operation can be redundant and no scheduling is changed if marked as so.
82 */
83 bool YieldThread(Thread* thread);
84
85 /**
86 * Takes a thread and moves it to the back of the it's priority list.
87 * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
88 * a better priority than the next thread in the core.
89 *
90 * @note This operation can be redundant and no scheduling is changed if marked as so.
91 */
92 bool YieldThreadAndBalanceLoad(Thread* thread);
93
94 /**
95 * Takes a thread and moves it out of the scheduling queue.
96 * and into the suggested queue. If no thread can be scheduled afterwards in that core,
97 * a suggested thread is obtained instead.
98 *
99 * @note This operation can be redundant and no scheduling is changed if marked as so.
100 */
101 bool YieldThreadAndWaitForLoadBalancing(Thread* thread);
102
103 /**
104 * Rotates the scheduling queues of threads at a preemption priority and then does
105 * some core rebalancing. Preemption priorities can be found in the array
106 * 'preemption_priorities'.
107 *
108 * @note This operation happens every 10ms.
109 */
110 void PreemptThreads();
111
112 u32 CpuCoresCount() const {
113 return Core::Hardware::NUM_CPU_CORES;
114 }
115
116 void SetReselectionPending() {
117 is_reselection_pending.store(true, std::memory_order_release);
118 }
119
120 bool IsReselectionPending() const {
121 return is_reselection_pending.load(std::memory_order_acquire);
122 }
123
124 void Shutdown();
125
126private:
127 friend class SchedulerLock;
128
129 /// Lock the scheduler to the current thread.
130 void Lock();
131
132 /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling
133 /// and reschedules current core if needed.
134 void Unlock();
135
136 void EnableInterruptAndSchedule(u32 cores_pending_reschedule,
137 Core::EmuThreadHandle global_thread);
138
139 /**
140 * Add a thread to the suggested queue of a cpu core. Suggested threads may be
141 * picked if no thread is scheduled to run on the core.
142 */
143 void Suggest(u32 priority, std::size_t core, Thread* thread);
144
145 /**
146 * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
147 * picked if no thread is scheduled to run on the core.
148 */
149 void Unsuggest(u32 priority, std::size_t core, Thread* thread);
150
151 /**
152 * Add a thread to the scheduling queue of a cpu core. The thread is added at the
153 * back the queue in its priority level.
154 */
155 void Schedule(u32 priority, std::size_t core, Thread* thread);
156
157 /**
158 * Add a thread to the scheduling queue of a cpu core. The thread is added at the
159 * front the queue in its priority level.
160 */
161 void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
162
163 /// Reschedule an already scheduled thread based on a new priority
164 void Reschedule(u32 priority, std::size_t core, Thread* thread);
165
166 /// Unschedules a thread.
167 void Unschedule(u32 priority, std::size_t core, Thread* thread);
168
169 /**
170 * Transfers a thread into an specific core. If the destination_core is -1
171 * it will be unscheduled from its source code and added into its suggested
172 * queue.
173 */
174 void TransferToCore(u32 priority, s32 destination_core, Thread* thread);
175
176 bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner);
177
178 static constexpr u32 min_regular_priority = 2;
179 std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
180 scheduled_queue;
181 std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
182 suggested_queue;
183 std::atomic<bool> is_reselection_pending{false};
184
185 // The priority levels at which the global scheduler preempts threads every 10 ms. They are
186 // ordered from Core 0 to Core 3.
187 std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
188
189 /// Scheduler lock mechanisms.
190 bool is_locked{};
191 std::mutex inner_lock;
192 std::atomic<s64> scope_lock{};
193 Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
194
195 Common::SpinLock global_list_guard{};
196
197 /// Lists all thread ids that aren't deleted/etc.
198 std::vector<std::shared_ptr<Thread>> thread_list;
199 KernelCore& kernel;
200};
201
202class Scheduler final {
203public:
204 explicit Scheduler(Core::System& system, std::size_t core_id);
205 ~Scheduler();
206
207 /// Returns whether there are any threads that are ready to run.
208 bool HaveReadyThreads() const;
209
210 /// Reschedules to the next available thread (call after current thread is suspended)
211 void TryDoContextSwitch();
212
213 /// The next two are for SingleCore Only.
214 /// Unload current thread before preempting core.
215 void Unload(Thread* thread);
216 void Unload();
217 /// Reload current thread after core preemption.
218 void Reload(Thread* thread);
219 void Reload();
220
221 /// Gets the current running thread
222 Thread* GetCurrentThread() const;
223
224 /// Gets the currently selected thread from the top of the multilevel queue
225 Thread* GetSelectedThread() const;
226
227 /// Gets the timestamp for the last context switch in ticks.
228 u64 GetLastContextSwitchTicks() const;
229
230 bool ContextSwitchPending() const {
231 return is_context_switch_pending;
232 }
233
234 void Initialize();
235
236 /// Shutdowns the scheduler.
237 void Shutdown();
238
239 void OnThreadStart();
240
241 std::shared_ptr<Common::Fiber>& ControlContext() {
242 return switch_fiber;
243 }
244
245 const std::shared_ptr<Common::Fiber>& ControlContext() const {
246 return switch_fiber;
247 }
248
249private:
250 friend class GlobalScheduler;
251
252 /// Switches the CPU's active thread context to that of the specified thread
253 void SwitchContext();
254
255 /// When a thread wakes up, it must run this through it's new scheduler
256 void SwitchContextStep2();
257
258 /**
259 * Called on every context switch to update the internal timestamp
260 * This also updates the running time ticks for the given thread and
261 * process using the following difference:
262 *
263 * ticks += most_recent_ticks - last_context_switch_ticks
264 *
265 * The internal tick timestamp for the scheduler is simply the
266 * most recent tick count retrieved. No special arithmetic is
267 * applied to it.
268 */
269 void UpdateLastContextSwitchTime(Thread* thread, Process* process);
270
271 static void OnSwitch(void* this_scheduler);
272 void SwitchToCurrent();
273
274 std::shared_ptr<Thread> current_thread = nullptr;
275 std::shared_ptr<Thread> selected_thread = nullptr;
276 std::shared_ptr<Thread> current_thread_prev = nullptr;
277 std::shared_ptr<Thread> selected_thread_set = nullptr;
278 std::shared_ptr<Thread> idle_thread = nullptr;
279
280 std::shared_ptr<Common::Fiber> switch_fiber = nullptr;
281
282 Core::System& system;
283 u64 last_context_switch_time = 0;
284 u64 idle_selection_count = 0;
285 const std::size_t core_id;
286
287 Common::SpinLock guard{};
288
289 bool is_context_switch_pending = false;
290};
291
292class SchedulerLock {
293public:
294 [[nodiscard]] explicit SchedulerLock(KernelCore& kernel);
295 ~SchedulerLock();
296
297protected:
298 KernelCore& kernel;
299};
300
301class SchedulerLockAndSleep : public SchedulerLock {
302public:
303 explicit SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, Thread* time_task,
304 s64 nanoseconds);
305 ~SchedulerLockAndSleep();
306
307 void CancelSleep() {
308 sleep_cancelled = true;
309 }
310
311 void Release();
312
313private:
314 Handle& event_handle;
315 Thread* time_task;
316 s64 nanoseconds;
317 bool sleep_cancelled{};
318};
319
320} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 8c19f2534..b40fe3916 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -14,9 +14,9 @@
14#include "core/hle/kernel/client_session.h" 14#include "core/hle/kernel/client_session.h"
15#include "core/hle/kernel/handle_table.h" 15#include "core/hle/kernel/handle_table.h"
16#include "core/hle/kernel/hle_ipc.h" 16#include "core/hle/kernel/hle_ipc.h"
17#include "core/hle/kernel/k_scheduler.h"
17#include "core/hle/kernel/kernel.h" 18#include "core/hle/kernel/kernel.h"
18#include "core/hle/kernel/process.h" 19#include "core/hle/kernel/process.h"
19#include "core/hle/kernel/scheduler.h"
20#include "core/hle/kernel/server_session.h" 20#include "core/hle/kernel/server_session.h"
21#include "core/hle/kernel/session.h" 21#include "core/hle/kernel/session.h"
22#include "core/hle/kernel/thread.h" 22#include "core/hle/kernel/thread.h"
@@ -25,19 +25,19 @@
25namespace Kernel { 25namespace Kernel {
26 26
27ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} 27ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
28ServerSession::~ServerSession() = default; 28
29ServerSession::~ServerSession() {
30 kernel.ReleaseServiceThread(service_thread);
31}
29 32
30ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, 33ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
31 std::shared_ptr<Session> parent, 34 std::shared_ptr<Session> parent,
32 std::string name) { 35 std::string name) {
33 std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)}; 36 std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)};
34 37
35 session->request_event =
36 Core::Timing::CreateEvent(name, [session](std::uintptr_t, std::chrono::nanoseconds) {
37 session->CompleteSyncRequest();
38 });
39 session->name = std::move(name); 38 session->name = std::move(name);
40 session->parent = std::move(parent); 39 session->parent = std::move(parent);
40 session->service_thread = kernel.CreateServiceThread(session->name);
41 41
42 return MakeResult(std::move(session)); 42 return MakeResult(std::move(session));
43} 43}
@@ -130,8 +130,7 @@ ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& con
130 } 130 }
131 } 131 }
132 132
133 LOG_CRITICAL(IPC, "Unknown domain command={}", 133 LOG_CRITICAL(IPC, "Unknown domain command={}", domain_message_header.command.Value());
134 static_cast<int>(domain_message_header.command.Value()));
135 ASSERT(false); 134 ASSERT(false);
136 return RESULT_SUCCESS; 135 return RESULT_SUCCESS;
137} 136}
@@ -143,16 +142,16 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread,
143 std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread)); 142 std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread));
144 143
145 context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); 144 context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
146 request_queue.Push(std::move(context)); 145
146 if (auto strong_ptr = service_thread.lock()) {
147 strong_ptr->QueueSyncRequest(*this, std::move(context));
148 return RESULT_SUCCESS;
149 }
147 150
148 return RESULT_SUCCESS; 151 return RESULT_SUCCESS;
149} 152}
150 153
151ResultCode ServerSession::CompleteSyncRequest() { 154ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) {
152 ASSERT(!request_queue.Empty());
153
154 auto& context = *request_queue.Front();
155
156 ResultCode result = RESULT_SUCCESS; 155 ResultCode result = RESULT_SUCCESS;
157 // If the session has been converted to a domain, handle the domain request 156 // If the session has been converted to a domain, handle the domain request
158 if (IsDomain() && context.HasDomainMessageHeader()) { 157 if (IsDomain() && context.HasDomainMessageHeader()) {
@@ -171,25 +170,20 @@ ResultCode ServerSession::CompleteSyncRequest() {
171 170
172 // Some service requests require the thread to block 171 // Some service requests require the thread to block
173 { 172 {
174 SchedulerLock lock(kernel); 173 KScopedSchedulerLock lock(kernel);
175 if (!context.IsThreadWaiting()) { 174 if (!context.IsThreadWaiting()) {
176 context.GetThread().ResumeFromWait(); 175 context.GetThread().ResumeFromWait();
177 context.GetThread().SetSynchronizationResults(nullptr, result); 176 context.GetThread().SetSynchronizationResults(nullptr, result);
178 } 177 }
179 } 178 }
180 179
181 request_queue.Pop();
182
183 return result; 180 return result;
184} 181}
185 182
186ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, 183ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
187 Core::Memory::Memory& memory, 184 Core::Memory::Memory& memory,
188 Core::Timing::CoreTiming& core_timing) { 185 Core::Timing::CoreTiming& core_timing) {
189 const ResultCode result = QueueSyncRequest(std::move(thread), memory); 186 return QueueSyncRequest(std::move(thread), memory);
190 const auto delay = std::chrono::nanoseconds{kernel.IsMulticore() ? 0 : 20000};
191 core_timing.ScheduleEvent(delay, request_event, {});
192 return result;
193} 187}
194 188
195} // namespace Kernel 189} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index d23e9ec68..e8d1d99ea 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,6 +10,7 @@
10#include <vector> 10#include <vector>
11 11
12#include "common/threadsafe_queue.h" 12#include "common/threadsafe_queue.h"
13#include "core/hle/kernel/service_thread.h"
13#include "core/hle/kernel/synchronization_object.h" 14#include "core/hle/kernel/synchronization_object.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15 16
@@ -43,6 +44,8 @@ class Thread;
43 * TLS buffer and control is transferred back to it. 44 * TLS buffer and control is transferred back to it.
44 */ 45 */
45class ServerSession final : public SynchronizationObject { 46class ServerSession final : public SynchronizationObject {
47 friend class ServiceThread;
48
46public: 49public:
47 explicit ServerSession(KernelCore& kernel); 50 explicit ServerSession(KernelCore& kernel);
48 ~ServerSession() override; 51 ~ServerSession() override;
@@ -132,7 +135,7 @@ private:
132 ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); 135 ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory);
133 136
134 /// Completes a sync request from the emulated application. 137 /// Completes a sync request from the emulated application.
135 ResultCode CompleteSyncRequest(); 138 ResultCode CompleteSyncRequest(HLERequestContext& context);
136 139
137 /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an 140 /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an
138 /// object handle. 141 /// object handle.
@@ -163,11 +166,8 @@ private:
163 /// The name of this session (optional) 166 /// The name of this session (optional)
164 std::string name; 167 std::string name;
165 168
166 /// Core timing event used to schedule the service request at some point in the future 169 /// Thread to dispatch service requests
167 std::shared_ptr<Core::Timing::EventType> request_event; 170 std::weak_ptr<ServiceThread> service_thread;
168
169 /// Queue of scheduled service requests
170 Common::MPSCQueue<std::shared_ptr<Kernel::HLERequestContext>> request_queue;
171}; 171};
172 172
173} // namespace Kernel 173} // namespace Kernel
diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp
new file mode 100644
index 000000000..ee46f3e21
--- /dev/null
+++ b/src/core/hle/kernel/service_thread.cpp
@@ -0,0 +1,110 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <condition_variable>
6#include <functional>
7#include <mutex>
8#include <thread>
9#include <vector>
10#include <queue>
11
12#include "common/assert.h"
13#include "common/scope_exit.h"
14#include "common/thread.h"
15#include "core/core.h"
16#include "core/hle/kernel/kernel.h"
17#include "core/hle/kernel/server_session.h"
18#include "core/hle/kernel/service_thread.h"
19#include "core/hle/lock.h"
20#include "video_core/renderer_base.h"
21
22namespace Kernel {
23
24class ServiceThread::Impl final {
25public:
26 explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name);
27 ~Impl();
28
29 void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
30
31private:
32 std::vector<std::thread> threads;
33 std::queue<std::function<void()>> requests;
34 std::mutex queue_mutex;
35 std::condition_variable condition;
36 const std::string service_name;
37 bool stop{};
38};
39
40ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name)
41 : service_name{name} {
42 for (std::size_t i = 0; i < num_threads; ++i)
43 threads.emplace_back([this, &kernel] {
44 Common::SetCurrentThreadName(std::string{"yuzu:HleService:" + service_name}.c_str());
45
46 // Wait for first request before trying to acquire a render context
47 {
48 std::unique_lock lock{queue_mutex};
49 condition.wait(lock, [this] { return stop || !requests.empty(); });
50 }
51
52 kernel.RegisterHostThread();
53
54 while (true) {
55 std::function<void()> task;
56
57 {
58 std::unique_lock lock{queue_mutex};
59 condition.wait(lock, [this] { return stop || !requests.empty(); });
60 if (stop || requests.empty()) {
61 return;
62 }
63 task = std::move(requests.front());
64 requests.pop();
65 }
66
67 task();
68 }
69 });
70}
71
72void ServiceThread::Impl::QueueSyncRequest(ServerSession& session,
73 std::shared_ptr<HLERequestContext>&& context) {
74 {
75 std::unique_lock lock{queue_mutex};
76
77 // ServerSession owns the service thread, so we cannot caption a strong pointer here in the
78 // event that the ServerSession is terminated.
79 std::weak_ptr<ServerSession> weak_ptr{SharedFrom(&session)};
80 requests.emplace([weak_ptr, context{std::move(context)}]() {
81 if (auto strong_ptr = weak_ptr.lock()) {
82 strong_ptr->CompleteSyncRequest(*context);
83 }
84 });
85 }
86 condition.notify_one();
87}
88
89ServiceThread::Impl::~Impl() {
90 {
91 std::unique_lock lock{queue_mutex};
92 stop = true;
93 }
94 condition.notify_all();
95 for (std::thread& thread : threads) {
96 thread.join();
97 }
98}
99
100ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name)
101 : impl{std::make_unique<Impl>(kernel, num_threads, name)} {}
102
103ServiceThread::~ServiceThread() = default;
104
105void ServiceThread::QueueSyncRequest(ServerSession& session,
106 std::shared_ptr<HLERequestContext>&& context) {
107 impl->QueueSyncRequest(session, std::move(context));
108}
109
110} // namespace Kernel
diff --git a/src/core/hle/kernel/service_thread.h b/src/core/hle/kernel/service_thread.h
new file mode 100644
index 000000000..025ab8fb5
--- /dev/null
+++ b/src/core/hle/kernel/service_thread.h
@@ -0,0 +1,28 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <string>
9
10namespace Kernel {
11
12class HLERequestContext;
13class KernelCore;
14class ServerSession;
15
16class ServiceThread final {
17public:
18 explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name);
19 ~ServiceThread();
20
21 void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
22
23private:
24 class Impl;
25 std::unique_ptr<Impl> impl;
26};
27
28} // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 95d6e2b4d..de3ed25da 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -24,6 +24,8 @@
24#include "core/hle/kernel/client_session.h" 24#include "core/hle/kernel/client_session.h"
25#include "core/hle/kernel/errors.h" 25#include "core/hle/kernel/errors.h"
26#include "core/hle/kernel/handle_table.h" 26#include "core/hle/kernel/handle_table.h"
27#include "core/hle/kernel/k_scheduler.h"
28#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
27#include "core/hle/kernel/kernel.h" 29#include "core/hle/kernel/kernel.h"
28#include "core/hle/kernel/memory/memory_block.h" 30#include "core/hle/kernel/memory/memory_block.h"
29#include "core/hle/kernel/memory/page_table.h" 31#include "core/hle/kernel/memory/page_table.h"
@@ -32,7 +34,6 @@
32#include "core/hle/kernel/process.h" 34#include "core/hle/kernel/process.h"
33#include "core/hle/kernel/readable_event.h" 35#include "core/hle/kernel/readable_event.h"
34#include "core/hle/kernel/resource_limit.h" 36#include "core/hle/kernel/resource_limit.h"
35#include "core/hle/kernel/scheduler.h"
36#include "core/hle/kernel/shared_memory.h" 37#include "core/hle/kernel/shared_memory.h"
37#include "core/hle/kernel/svc.h" 38#include "core/hle/kernel/svc.h"
38#include "core/hle/kernel/svc_types.h" 39#include "core/hle/kernel/svc_types.h"
@@ -234,8 +235,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
234 235
235static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask, 236static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask,
236 u32 attribute) { 237 u32 attribute) {
237 return SetMemoryAttribute(system, static_cast<VAddr>(address), static_cast<std::size_t>(size), 238 return SetMemoryAttribute(system, address, size, mask, attribute);
238 mask, attribute);
239} 239}
240 240
241/// Maps a memory range into a different range. 241/// Maps a memory range into a different range.
@@ -255,8 +255,7 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr
255} 255}
256 256
257static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { 257static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
258 return MapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), 258 return MapMemory(system, dst_addr, src_addr, size);
259 static_cast<std::size_t>(size));
260} 259}
261 260
262/// Unmaps a region that was previously mapped with svcMapMemory 261/// Unmaps a region that was previously mapped with svcMapMemory
@@ -276,8 +275,7 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
276} 275}
277 276
278static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { 277static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
279 return UnmapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), 278 return UnmapMemory(system, dst_addr, src_addr, size);
280 static_cast<std::size_t>(size));
281} 279}
282 280
283/// Connect to an OS service given the port name, returns the handle to the port to out 281/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -332,7 +330,8 @@ static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle,
332 330
333/// Makes a blocking IPC call to an OS service. 331/// Makes a blocking IPC call to an OS service.
334static ResultCode SendSyncRequest(Core::System& system, Handle handle) { 332static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
335 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 333 auto& kernel = system.Kernel();
334 const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
336 std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle); 335 std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
337 if (!session) { 336 if (!session) {
338 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); 337 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
@@ -341,9 +340,9 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
341 340
342 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); 341 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
343 342
344 auto thread = system.CurrentScheduler().GetCurrentThread(); 343 auto thread = kernel.CurrentScheduler()->GetCurrentThread();
345 { 344 {
346 SchedulerLock lock(system.Kernel()); 345 KScopedSchedulerLock lock(kernel);
347 thread->InvalidateHLECallback(); 346 thread->InvalidateHLECallback();
348 thread->SetStatus(ThreadStatus::WaitIPC); 347 thread->SetStatus(ThreadStatus::WaitIPC);
349 session->SendSyncRequest(SharedFrom(thread), system.Memory(), system.CoreTiming()); 348 session->SendSyncRequest(SharedFrom(thread), system.Memory(), system.CoreTiming());
@@ -352,12 +351,12 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
352 if (thread->HasHLECallback()) { 351 if (thread->HasHLECallback()) {
353 Handle event_handle = thread->GetHLETimeEvent(); 352 Handle event_handle = thread->GetHLETimeEvent();
354 if (event_handle != InvalidHandle) { 353 if (event_handle != InvalidHandle) {
355 auto& time_manager = system.Kernel().TimeManager(); 354 auto& time_manager = kernel.TimeManager();
356 time_manager.UnscheduleTimeEvent(event_handle); 355 time_manager.UnscheduleTimeEvent(event_handle);
357 } 356 }
358 357
359 { 358 {
360 SchedulerLock lock(system.Kernel()); 359 KScopedSchedulerLock lock(kernel);
361 auto* sync_object = thread->GetHLESyncObject(); 360 auto* sync_object = thread->GetHLESyncObject();
362 sync_object->RemoveWaitingThread(SharedFrom(thread)); 361 sync_object->RemoveWaitingThread(SharedFrom(thread));
363 } 362 }
@@ -531,8 +530,7 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand
531 530
532static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle, 531static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle,
533 u32 mutex_addr, Handle requesting_thread_handle) { 532 u32 mutex_addr, Handle requesting_thread_handle) {
534 return ArbitrateLock(system, holding_thread_handle, static_cast<VAddr>(mutex_addr), 533 return ArbitrateLock(system, holding_thread_handle, mutex_addr, requesting_thread_handle);
535 requesting_thread_handle);
536} 534}
537 535
538/// Unlock a mutex 536/// Unlock a mutex
@@ -555,7 +553,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
555} 553}
556 554
557static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) { 555static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) {
558 return ArbitrateUnlock(system, static_cast<VAddr>(mutex_addr)); 556 return ArbitrateUnlock(system, mutex_addr);
559} 557}
560 558
561enum class BreakType : u32 { 559enum class BreakType : u32 {
@@ -658,7 +656,6 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
658 info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt); 656 info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt);
659 657
660 if (!break_reason.signal_debugger) { 658 if (!break_reason.signal_debugger) {
661 SchedulerLock lock(system.Kernel());
662 LOG_CRITICAL( 659 LOG_CRITICAL(
663 Debug_Emulated, 660 Debug_Emulated,
664 "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", 661 "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
@@ -666,18 +663,14 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
666 663
667 handle_debug_buffer(info1, info2); 664 handle_debug_buffer(info1, info2);
668 665
669 auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); 666 auto* const current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
670 const auto thread_processor_id = current_thread->GetProcessorID(); 667 const auto thread_processor_id = current_thread->GetProcessorID();
671 system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace(); 668 system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
672
673 // Kill the current thread
674 system.Kernel().ExceptionalExit();
675 current_thread->Stop();
676 } 669 }
677} 670}
678 671
679static void Break32(Core::System& system, u32 reason, u32 info1, u32 info2) { 672static void Break32(Core::System& system, u32 reason, u32 info1, u32 info2) {
680 Break(system, reason, static_cast<u64>(info1), static_cast<u64>(info2)); 673 Break(system, reason, info1, info2);
681} 674}
682 675
683/// Used to output a message on a debug hardware unit - does nothing on a retail unit 676/// Used to output a message on a debug hardware unit - does nothing on a retail unit
@@ -922,7 +915,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
922 } 915 }
923 916
924 const auto& core_timing = system.CoreTiming(); 917 const auto& core_timing = system.CoreTiming();
925 const auto& scheduler = system.CurrentScheduler(); 918 const auto& scheduler = *system.Kernel().CurrentScheduler();
926 const auto* const current_thread = scheduler.GetCurrentThread(); 919 const auto* const current_thread = scheduler.GetCurrentThread();
927 const bool same_thread = current_thread == thread.get(); 920 const bool same_thread = current_thread == thread.get();
928 921
@@ -948,7 +941,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
948 941
949static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low, 942static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low,
950 u32 info_id, u32 handle, u32 sub_id_high) { 943 u32 info_id, u32 handle, u32 sub_id_high) {
951 const u64 sub_id{static_cast<u64>(sub_id_low | (static_cast<u64>(sub_id_high) << 32))}; 944 const u64 sub_id{u64{sub_id_low} | (u64{sub_id_high} << 32)};
952 u64 res_value{}; 945 u64 res_value{};
953 946
954 const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)}; 947 const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)};
@@ -1009,7 +1002,7 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
1009} 1002}
1010 1003
1011static ResultCode MapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { 1004static ResultCode MapPhysicalMemory32(Core::System& system, u32 addr, u32 size) {
1012 return MapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); 1005 return MapPhysicalMemory(system, addr, size);
1013} 1006}
1014 1007
1015/// Unmaps memory previously mapped via MapPhysicalMemory 1008/// Unmaps memory previously mapped via MapPhysicalMemory
@@ -1063,7 +1056,7 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
1063} 1056}
1064 1057
1065static ResultCode UnmapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { 1058static ResultCode UnmapPhysicalMemory32(Core::System& system, u32 addr, u32 size) {
1066 return UnmapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); 1059 return UnmapPhysicalMemory(system, addr, size);
1067} 1060}
1068 1061
1069/// Sets the thread activity 1062/// Sets the thread activity
@@ -1090,7 +1083,7 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
1090 return ERR_INVALID_HANDLE; 1083 return ERR_INVALID_HANDLE;
1091 } 1084 }
1092 1085
1093 if (thread.get() == system.CurrentScheduler().GetCurrentThread()) { 1086 if (thread.get() == system.Kernel().CurrentScheduler()->GetCurrentThread()) {
1094 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); 1087 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
1095 return ERR_BUSY; 1088 return ERR_BUSY;
1096 } 1089 }
@@ -1123,7 +1116,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
1123 return ERR_INVALID_HANDLE; 1116 return ERR_INVALID_HANDLE;
1124 } 1117 }
1125 1118
1126 if (thread.get() == system.CurrentScheduler().GetCurrentThread()) { 1119 if (thread.get() == system.Kernel().CurrentScheduler()->GetCurrentThread()) {
1127 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); 1120 LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread");
1128 return ERR_BUSY; 1121 return ERR_BUSY;
1129 } 1122 }
@@ -1144,7 +1137,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
1144} 1137}
1145 1138
1146static ResultCode GetThreadContext32(Core::System& system, u32 thread_context, Handle handle) { 1139static ResultCode GetThreadContext32(Core::System& system, u32 thread_context, Handle handle) {
1147 return GetThreadContext(system, static_cast<VAddr>(thread_context), handle); 1140 return GetThreadContext(system, thread_context, handle);
1148} 1141}
1149 1142
1150/// Gets the priority for the specified thread 1143/// Gets the priority for the specified thread
@@ -1281,8 +1274,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1281 1274
1282static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr, 1275static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr,
1283 u32 size, u32 permissions) { 1276 u32 size, u32 permissions) {
1284 return MapSharedMemory(system, shared_memory_handle, static_cast<VAddr>(addr), 1277 return MapSharedMemory(system, shared_memory_handle, addr, size, permissions);
1285 static_cast<std::size_t>(size), permissions);
1286} 1278}
1287 1279
1288static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address, 1280static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,
@@ -1480,7 +1472,7 @@ static void ExitProcess(Core::System& system) {
1480 current_process->PrepareForTermination(); 1472 current_process->PrepareForTermination();
1481 1473
1482 // Kill the current thread 1474 // Kill the current thread
1483 system.CurrentScheduler().GetCurrentThread()->Stop(); 1475 system.Kernel().CurrentScheduler()->GetCurrentThread()->Stop();
1484} 1476}
1485 1477
1486static void ExitProcess32(Core::System& system) { 1478static void ExitProcess32(Core::System& system) {
@@ -1552,8 +1544,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
1552 1544
1553static ResultCode CreateThread32(Core::System& system, Handle* out_handle, u32 priority, 1545static ResultCode CreateThread32(Core::System& system, Handle* out_handle, u32 priority,
1554 u32 entry_point, u32 arg, u32 stack_top, s32 processor_id) { 1546 u32 entry_point, u32 arg, u32 stack_top, s32 processor_id) {
1555 return CreateThread(system, out_handle, static_cast<VAddr>(entry_point), static_cast<u64>(arg), 1547 return CreateThread(system, out_handle, entry_point, arg, stack_top, priority, processor_id);
1556 static_cast<VAddr>(stack_top), priority, processor_id);
1557} 1548}
1558 1549
1559/// Starts the thread for the provided handle 1550/// Starts the thread for the provided handle
@@ -1581,8 +1572,8 @@ static ResultCode StartThread32(Core::System& system, Handle thread_handle) {
1581static void ExitThread(Core::System& system) { 1572static void ExitThread(Core::System& system) {
1582 LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); 1573 LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
1583 1574
1584 auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); 1575 auto* const current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
1585 system.GlobalScheduler().RemoveThread(SharedFrom(current_thread)); 1576 system.GlobalSchedulerContext().RemoveThread(SharedFrom(current_thread));
1586 current_thread->Stop(); 1577 current_thread->Stop();
1587} 1578}
1588 1579
@@ -1592,53 +1583,39 @@ static void ExitThread32(Core::System& system) {
1592 1583
1593/// Sleep the current thread 1584/// Sleep the current thread
1594static void SleepThread(Core::System& system, s64 nanoseconds) { 1585static void SleepThread(Core::System& system, s64 nanoseconds) {
1595 LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds); 1586 LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
1596 1587
1597 enum class SleepType : s64 { 1588 enum class SleepType : s64 {
1598 YieldWithoutLoadBalancing = 0, 1589 YieldWithoutCoreMigration = 0,
1599 YieldWithLoadBalancing = -1, 1590 YieldWithCoreMigration = -1,
1600 YieldAndWaitForLoadBalancing = -2, 1591 YieldAndWaitForLoadBalancing = -2,
1601 }; 1592 };
1602 1593
1603 auto& scheduler = system.CurrentScheduler(); 1594 auto& scheduler = *system.Kernel().CurrentScheduler();
1604 auto* const current_thread = scheduler.GetCurrentThread();
1605 bool is_redundant = false;
1606
1607 if (nanoseconds <= 0) { 1595 if (nanoseconds <= 0) {
1608 switch (static_cast<SleepType>(nanoseconds)) { 1596 switch (static_cast<SleepType>(nanoseconds)) {
1609 case SleepType::YieldWithoutLoadBalancing: { 1597 case SleepType::YieldWithoutCoreMigration: {
1610 auto pair = current_thread->YieldSimple(); 1598 scheduler.YieldWithoutCoreMigration();
1611 is_redundant = pair.second;
1612 break; 1599 break;
1613 } 1600 }
1614 case SleepType::YieldWithLoadBalancing: { 1601 case SleepType::YieldWithCoreMigration: {
1615 auto pair = current_thread->YieldAndBalanceLoad(); 1602 scheduler.YieldWithCoreMigration();
1616 is_redundant = pair.second;
1617 break; 1603 break;
1618 } 1604 }
1619 case SleepType::YieldAndWaitForLoadBalancing: { 1605 case SleepType::YieldAndWaitForLoadBalancing: {
1620 auto pair = current_thread->YieldAndWaitForLoadBalancing(); 1606 scheduler.YieldToAnyThread();
1621 is_redundant = pair.second;
1622 break; 1607 break;
1623 } 1608 }
1624 default: 1609 default:
1625 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); 1610 UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
1626 } 1611 }
1627 } else { 1612 } else {
1628 current_thread->Sleep(nanoseconds); 1613 scheduler.GetCurrentThread()->Sleep(nanoseconds);
1629 }
1630
1631 if (is_redundant && !system.Kernel().IsMulticore()) {
1632 system.Kernel().ExitSVCProfile();
1633 system.CoreTiming().AddTicks(1000U);
1634 system.GetCpuManager().PreemptSingleCore();
1635 system.Kernel().EnterSVCProfile();
1636 } 1614 }
1637} 1615}
1638 1616
1639static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanoseconds_high) { 1617static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanoseconds_high) {
1640 const s64 nanoseconds = static_cast<s64>(static_cast<u64>(nanoseconds_low) | 1618 const auto nanoseconds = static_cast<s64>(u64{nanoseconds_low} | (u64{nanoseconds_high} << 32));
1641 (static_cast<u64>(nanoseconds_high) << 32));
1642 SleepThread(system, nanoseconds); 1619 SleepThread(system, nanoseconds);
1643} 1620}
1644 1621
@@ -1668,10 +1645,10 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
1668 ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); 1645 ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
1669 auto& kernel = system.Kernel(); 1646 auto& kernel = system.Kernel();
1670 Handle event_handle; 1647 Handle event_handle;
1671 Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); 1648 Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread();
1672 auto* const current_process = system.Kernel().CurrentProcess(); 1649 auto* const current_process = kernel.CurrentProcess();
1673 { 1650 {
1674 SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds); 1651 KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds);
1675 const auto& handle_table = current_process->GetHandleTable(); 1652 const auto& handle_table = current_process->GetHandleTable();
1676 std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); 1653 std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
1677 ASSERT(thread); 1654 ASSERT(thread);
@@ -1707,7 +1684,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
1707 } 1684 }
1708 1685
1709 { 1686 {
1710 SchedulerLock lock(kernel); 1687 KScopedSchedulerLock lock(kernel);
1711 1688
1712 auto* owner = current_thread->GetLockOwner(); 1689 auto* owner = current_thread->GetLockOwner();
1713 if (owner != nullptr) { 1690 if (owner != nullptr) {
@@ -1724,10 +1701,8 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
1724static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr, 1701static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr,
1725 u32 condition_variable_addr, Handle thread_handle, 1702 u32 condition_variable_addr, Handle thread_handle,
1726 u32 nanoseconds_low, u32 nanoseconds_high) { 1703 u32 nanoseconds_low, u32 nanoseconds_high) {
1727 const s64 nanoseconds = 1704 const auto nanoseconds = static_cast<s64>(nanoseconds_low | (u64{nanoseconds_high} << 32));
1728 static_cast<s64>(nanoseconds_low | (static_cast<u64>(nanoseconds_high) << 32)); 1705 return WaitProcessWideKeyAtomic(system, mutex_addr, condition_variable_addr, thread_handle,
1729 return WaitProcessWideKeyAtomic(system, static_cast<VAddr>(mutex_addr),
1730 static_cast<VAddr>(condition_variable_addr), thread_handle,
1731 nanoseconds); 1706 nanoseconds);
1732} 1707}
1733 1708
@@ -1740,7 +1715,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1740 1715
1741 // Retrieve a list of all threads that are waiting for this condition variable. 1716 // Retrieve a list of all threads that are waiting for this condition variable.
1742 auto& kernel = system.Kernel(); 1717 auto& kernel = system.Kernel();
1743 SchedulerLock lock(kernel); 1718 KScopedSchedulerLock lock(kernel);
1744 auto* const current_process = kernel.CurrentProcess(); 1719 auto* const current_process = kernel.CurrentProcess();
1745 std::vector<std::shared_ptr<Thread>> waiting_threads = 1720 std::vector<std::shared_ptr<Thread>> waiting_threads =
1746 current_process->GetConditionVariableThreads(condition_variable_addr); 1721 current_process->GetConditionVariableThreads(condition_variable_addr);
@@ -1833,8 +1808,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
1833 1808
1834static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value, 1809static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value,
1835 u32 timeout_low, u32 timeout_high) { 1810 u32 timeout_low, u32 timeout_high) {
1836 s64 timeout = static_cast<s64>(timeout_low | (static_cast<u64>(timeout_high) << 32)); 1811 const auto timeout = static_cast<s64>(timeout_low | (u64{timeout_high} << 32));
1837 return WaitForAddress(system, static_cast<VAddr>(address), type, value, timeout); 1812 return WaitForAddress(system, address, type, value, timeout);
1838} 1813}
1839 1814
1840// Signals to an address (via Address Arbiter) 1815// Signals to an address (via Address Arbiter)
@@ -1862,7 +1837,7 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
1862 1837
1863static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value, 1838static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value,
1864 s32 num_to_wake) { 1839 s32 num_to_wake) {
1865 return SignalToAddress(system, static_cast<VAddr>(address), type, value, num_to_wake); 1840 return SignalToAddress(system, address, type, value, num_to_wake);
1866} 1841}
1867 1842
1868static void KernelDebug([[maybe_unused]] Core::System& system, 1843static void KernelDebug([[maybe_unused]] Core::System& system,
@@ -1893,7 +1868,7 @@ static u64 GetSystemTick(Core::System& system) {
1893} 1868}
1894 1869
1895static void GetSystemTick32(Core::System& system, u32* time_low, u32* time_high) { 1870static void GetSystemTick32(Core::System& system, u32* time_low, u32* time_high) {
1896 u64 time = GetSystemTick(system); 1871 const auto time = GetSystemTick(system);
1897 *time_low = static_cast<u32>(time); 1872 *time_low = static_cast<u32>(time);
1898 *time_high = static_cast<u32>(time >> 32); 1873 *time_high = static_cast<u32>(time >> 32);
1899} 1874}
@@ -1984,8 +1959,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1984 1959
1985static ResultCode CreateTransferMemory32(Core::System& system, Handle* handle, u32 addr, u32 size, 1960static ResultCode CreateTransferMemory32(Core::System& system, Handle* handle, u32 addr, u32 size,
1986 u32 permissions) { 1961 u32 permissions) {
1987 return CreateTransferMemory(system, handle, static_cast<VAddr>(addr), 1962 return CreateTransferMemory(system, handle, addr, size, permissions);
1988 static_cast<std::size_t>(size), permissions);
1989} 1963}
1990 1964
1991static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core, 1965static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core,
@@ -2003,7 +1977,7 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
2003 } 1977 }
2004 1978
2005 *core = thread->GetIdealCore(); 1979 *core = thread->GetIdealCore();
2006 *mask = thread->GetAffinityMask(); 1980 *mask = thread->GetAffinityMask().GetAffinityMask();
2007 1981
2008 return RESULT_SUCCESS; 1982 return RESULT_SUCCESS;
2009} 1983}
@@ -2075,8 +2049,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
2075 2049
2076static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle, u32 core, 2050static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle, u32 core,
2077 u32 affinity_mask_low, u32 affinity_mask_high) { 2051 u32 affinity_mask_low, u32 affinity_mask_high) {
2078 const u64 affinity_mask = 2052 const auto affinity_mask = u64{affinity_mask_low} | (u64{affinity_mask_high} << 32);
2079 static_cast<u64>(affinity_mask_low) | (static_cast<u64>(affinity_mask_high) << 32);
2080 return SetThreadCoreMask(system, thread_handle, core, affinity_mask); 2053 return SetThreadCoreMask(system, thread_handle, core, affinity_mask);
2081} 2054}
2082 2055
@@ -2341,9 +2314,10 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
2341 return RESULT_SUCCESS; 2314 return RESULT_SUCCESS;
2342} 2315}
2343 2316
2344static ResultCode FlushProcessDataCache32(Core::System& system, Handle handle, u32 address, 2317static ResultCode FlushProcessDataCache32([[maybe_unused]] Core::System& system,
2345 u32 size) { 2318 [[maybe_unused]] Handle handle,
2346 // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a nope 2319 [[maybe_unused]] u32 address, [[maybe_unused]] u32 size) {
2320 // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a no-op,
2347 // as all emulation is done in the same cache level in host architecture, thus data cache 2321 // as all emulation is done in the same cache level in host architecture, thus data cache
2348 // does not need flushing. 2322 // does not need flushing.
2349 LOG_DEBUG(Kernel_SVC, "called"); 2323 LOG_DEBUG(Kernel_SVC, "called");
@@ -2639,7 +2613,7 @@ void Call(Core::System& system, u32 immediate) {
2639 auto& kernel = system.Kernel(); 2613 auto& kernel = system.Kernel();
2640 kernel.EnterSVCProfile(); 2614 kernel.EnterSVCProfile();
2641 2615
2642 auto* thread = system.CurrentScheduler().GetCurrentThread(); 2616 auto* thread = kernel.CurrentScheduler()->GetCurrentThread();
2643 thread->SetContinuousOnSVC(true); 2617 thread->SetContinuousOnSVC(true);
2644 2618
2645 const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate) 2619 const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index 8b875d853..d3f520ea2 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -5,8 +5,9 @@
5#include "core/core.h" 5#include "core/core.h"
6#include "core/hle/kernel/errors.h" 6#include "core/hle/kernel/errors.h"
7#include "core/hle/kernel/handle_table.h" 7#include "core/hle/kernel/handle_table.h"
8#include "core/hle/kernel/k_scheduler.h"
9#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
8#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
9#include "core/hle/kernel/scheduler.h"
10#include "core/hle/kernel/synchronization.h" 11#include "core/hle/kernel/synchronization.h"
11#include "core/hle/kernel/synchronization_object.h" 12#include "core/hle/kernel/synchronization_object.h"
12#include "core/hle/kernel/thread.h" 13#include "core/hle/kernel/thread.h"
@@ -18,7 +19,7 @@ Synchronization::Synchronization(Core::System& system) : system{system} {}
18 19
19void Synchronization::SignalObject(SynchronizationObject& obj) const { 20void Synchronization::SignalObject(SynchronizationObject& obj) const {
20 auto& kernel = system.Kernel(); 21 auto& kernel = system.Kernel();
21 SchedulerLock lock(kernel); 22 KScopedSchedulerLock lock(kernel);
22 if (obj.IsSignaled()) { 23 if (obj.IsSignaled()) {
23 for (auto thread : obj.GetWaitingThreads()) { 24 for (auto thread : obj.GetWaitingThreads()) {
24 if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) { 25 if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
@@ -37,10 +38,10 @@ void Synchronization::SignalObject(SynchronizationObject& obj) const {
37std::pair<ResultCode, Handle> Synchronization::WaitFor( 38std::pair<ResultCode, Handle> Synchronization::WaitFor(
38 std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { 39 std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
39 auto& kernel = system.Kernel(); 40 auto& kernel = system.Kernel();
40 auto* const thread = system.CurrentScheduler().GetCurrentThread(); 41 auto* const thread = kernel.CurrentScheduler()->GetCurrentThread();
41 Handle event_handle = InvalidHandle; 42 Handle event_handle = InvalidHandle;
42 { 43 {
43 SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds); 44 KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds);
44 const auto itr = 45 const auto itr =
45 std::find_if(sync_objects.begin(), sync_objects.end(), 46 std::find_if(sync_objects.begin(), sync_objects.end(),
46 [thread](const std::shared_ptr<SynchronizationObject>& object) { 47 [thread](const std::shared_ptr<SynchronizationObject>& object) {
@@ -89,7 +90,7 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
89 } 90 }
90 91
91 { 92 {
92 SchedulerLock lock(kernel); 93 KScopedSchedulerLock lock(kernel);
93 ResultCode signaling_result = thread->GetSignalingResult(); 94 ResultCode signaling_result = thread->GetSignalingResult();
94 SynchronizationObject* signaling_object = thread->GetSignalingObject(); 95 SynchronizationObject* signaling_object = thread->GetSignalingObject();
95 thread->SetSynchronizationObjects(nullptr); 96 thread->SetSynchronizationObjects(nullptr);
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 7d1eb2c6e..a4f9e0d97 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -17,10 +17,11 @@
17#include "core/hardware_properties.h" 17#include "core/hardware_properties.h"
18#include "core/hle/kernel/errors.h" 18#include "core/hle/kernel/errors.h"
19#include "core/hle/kernel/handle_table.h" 19#include "core/hle/kernel/handle_table.h"
20#include "core/hle/kernel/k_scheduler.h"
21#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
20#include "core/hle/kernel/kernel.h" 22#include "core/hle/kernel/kernel.h"
21#include "core/hle/kernel/object.h" 23#include "core/hle/kernel/object.h"
22#include "core/hle/kernel/process.h" 24#include "core/hle/kernel/process.h"
23#include "core/hle/kernel/scheduler.h"
24#include "core/hle/kernel/thread.h" 25#include "core/hle/kernel/thread.h"
25#include "core/hle/kernel/time_manager.h" 26#include "core/hle/kernel/time_manager.h"
26#include "core/hle/result.h" 27#include "core/hle/result.h"
@@ -50,7 +51,7 @@ Thread::~Thread() = default;
50 51
51void Thread::Stop() { 52void Thread::Stop() {
52 { 53 {
53 SchedulerLock lock(kernel); 54 KScopedSchedulerLock lock(kernel);
54 SetStatus(ThreadStatus::Dead); 55 SetStatus(ThreadStatus::Dead);
55 Signal(); 56 Signal();
56 kernel.GlobalHandleTable().Close(global_handle); 57 kernel.GlobalHandleTable().Close(global_handle);
@@ -67,7 +68,7 @@ void Thread::Stop() {
67} 68}
68 69
69void Thread::ResumeFromWait() { 70void Thread::ResumeFromWait() {
70 SchedulerLock lock(kernel); 71 KScopedSchedulerLock lock(kernel);
71 switch (status) { 72 switch (status) {
72 case ThreadStatus::Paused: 73 case ThreadStatus::Paused:
73 case ThreadStatus::WaitSynch: 74 case ThreadStatus::WaitSynch:
@@ -99,19 +100,18 @@ void Thread::ResumeFromWait() {
99} 100}
100 101
101void Thread::OnWakeUp() { 102void Thread::OnWakeUp() {
102 SchedulerLock lock(kernel); 103 KScopedSchedulerLock lock(kernel);
103
104 SetStatus(ThreadStatus::Ready); 104 SetStatus(ThreadStatus::Ready);
105} 105}
106 106
107ResultCode Thread::Start() { 107ResultCode Thread::Start() {
108 SchedulerLock lock(kernel); 108 KScopedSchedulerLock lock(kernel);
109 SetStatus(ThreadStatus::Ready); 109 SetStatus(ThreadStatus::Ready);
110 return RESULT_SUCCESS; 110 return RESULT_SUCCESS;
111} 111}
112 112
113void Thread::CancelWait() { 113void Thread::CancelWait() {
114 SchedulerLock lock(kernel); 114 KScopedSchedulerLock lock(kernel);
115 if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) { 115 if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) {
116 is_sync_cancelled = true; 116 is_sync_cancelled = true;
117 return; 117 return;
@@ -186,12 +186,14 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
186 thread->status = ThreadStatus::Dormant; 186 thread->status = ThreadStatus::Dormant;
187 thread->entry_point = entry_point; 187 thread->entry_point = entry_point;
188 thread->stack_top = stack_top; 188 thread->stack_top = stack_top;
189 thread->disable_count = 1;
189 thread->tpidr_el0 = 0; 190 thread->tpidr_el0 = 0;
190 thread->nominal_priority = thread->current_priority = priority; 191 thread->nominal_priority = thread->current_priority = priority;
191 thread->last_running_ticks = 0; 192 thread->schedule_count = -1;
193 thread->last_scheduled_tick = 0;
192 thread->processor_id = processor_id; 194 thread->processor_id = processor_id;
193 thread->ideal_core = processor_id; 195 thread->ideal_core = processor_id;
194 thread->affinity_mask = 1ULL << processor_id; 196 thread->affinity_mask.SetAffinity(processor_id, true);
195 thread->wait_objects = nullptr; 197 thread->wait_objects = nullptr;
196 thread->mutex_wait_address = 0; 198 thread->mutex_wait_address = 0;
197 thread->condvar_wait_address = 0; 199 thread->condvar_wait_address = 0;
@@ -201,7 +203,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
201 thread->owner_process = owner_process; 203 thread->owner_process = owner_process;
202 thread->type = type_flags; 204 thread->type = type_flags;
203 if ((type_flags & THREADTYPE_IDLE) == 0) { 205 if ((type_flags & THREADTYPE_IDLE) == 0) {
204 auto& scheduler = kernel.GlobalScheduler(); 206 auto& scheduler = kernel.GlobalSchedulerContext();
205 scheduler.AddThread(thread); 207 scheduler.AddThread(thread);
206 } 208 }
207 if (owner_process) { 209 if (owner_process) {
@@ -225,7 +227,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
225} 227}
226 228
227void Thread::SetPriority(u32 priority) { 229void Thread::SetPriority(u32 priority) {
228 SchedulerLock lock(kernel); 230 KScopedSchedulerLock lock(kernel);
229 ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST, 231 ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST,
230 "Invalid priority value."); 232 "Invalid priority value.");
231 nominal_priority = priority; 233 nominal_priority = priority;
@@ -362,7 +364,7 @@ bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
362} 364}
363 365
364ResultCode Thread::SetActivity(ThreadActivity value) { 366ResultCode Thread::SetActivity(ThreadActivity value) {
365 SchedulerLock lock(kernel); 367 KScopedSchedulerLock lock(kernel);
366 368
367 auto sched_status = GetSchedulingStatus(); 369 auto sched_status = GetSchedulingStatus();
368 370
@@ -391,7 +393,7 @@ ResultCode Thread::SetActivity(ThreadActivity value) {
391ResultCode Thread::Sleep(s64 nanoseconds) { 393ResultCode Thread::Sleep(s64 nanoseconds) {
392 Handle event_handle{}; 394 Handle event_handle{};
393 { 395 {
394 SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds); 396 KScopedSchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds);
395 SetStatus(ThreadStatus::WaitSleep); 397 SetStatus(ThreadStatus::WaitSleep);
396 } 398 }
397 399
@@ -402,39 +404,12 @@ ResultCode Thread::Sleep(s64 nanoseconds) {
402 return RESULT_SUCCESS; 404 return RESULT_SUCCESS;
403} 405}
404 406
405std::pair<ResultCode, bool> Thread::YieldSimple() {
406 bool is_redundant = false;
407 {
408 SchedulerLock lock(kernel);
409 is_redundant = kernel.GlobalScheduler().YieldThread(this);
410 }
411 return {RESULT_SUCCESS, is_redundant};
412}
413
414std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() {
415 bool is_redundant = false;
416 {
417 SchedulerLock lock(kernel);
418 is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
419 }
420 return {RESULT_SUCCESS, is_redundant};
421}
422
423std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() {
424 bool is_redundant = false;
425 {
426 SchedulerLock lock(kernel);
427 is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
428 }
429 return {RESULT_SUCCESS, is_redundant};
430}
431
432void Thread::AddSchedulingFlag(ThreadSchedFlags flag) { 407void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
433 const u32 old_state = scheduling_state; 408 const u32 old_state = scheduling_state;
434 pausing_state |= static_cast<u32>(flag); 409 pausing_state |= static_cast<u32>(flag);
435 const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); 410 const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
436 scheduling_state = base_scheduling | pausing_state; 411 scheduling_state = base_scheduling | pausing_state;
437 kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); 412 KScheduler::OnThreadStateChanged(kernel, this, old_state);
438} 413}
439 414
440void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) { 415void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
@@ -442,23 +417,24 @@ void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
442 pausing_state &= ~static_cast<u32>(flag); 417 pausing_state &= ~static_cast<u32>(flag);
443 const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); 418 const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
444 scheduling_state = base_scheduling | pausing_state; 419 scheduling_state = base_scheduling | pausing_state;
445 kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); 420 KScheduler::OnThreadStateChanged(kernel, this, old_state);
446} 421}
447 422
448void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { 423void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
449 const u32 old_state = scheduling_state; 424 const u32 old_state = scheduling_state;
450 scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) | 425 scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
451 static_cast<u32>(new_status); 426 static_cast<u32>(new_status);
452 kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); 427 KScheduler::OnThreadStateChanged(kernel, this, old_state);
453} 428}
454 429
455void Thread::SetCurrentPriority(u32 new_priority) { 430void Thread::SetCurrentPriority(u32 new_priority) {
456 const u32 old_priority = std::exchange(current_priority, new_priority); 431 const u32 old_priority = std::exchange(current_priority, new_priority);
457 kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority); 432 KScheduler::OnThreadPriorityChanged(kernel, this, kernel.CurrentScheduler()->GetCurrentThread(),
433 old_priority);
458} 434}
459 435
460ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { 436ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
461 SchedulerLock lock(kernel); 437 KScopedSchedulerLock lock(kernel);
462 const auto HighestSetCore = [](u64 mask, u32 max_cores) { 438 const auto HighestSetCore = [](u64 mask, u32 max_cores) {
463 for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) { 439 for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) {
464 if (((mask >> core) & 1) != 0) { 440 if (((mask >> core) & 1) != 0) {
@@ -479,20 +455,21 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
479 } 455 }
480 if (use_override) { 456 if (use_override) {
481 ideal_core_override = new_core; 457 ideal_core_override = new_core;
482 affinity_mask_override = new_affinity_mask;
483 } else { 458 } else {
484 const u64 old_affinity_mask = std::exchange(affinity_mask, new_affinity_mask); 459 const auto old_affinity_mask = affinity_mask;
460 affinity_mask.SetAffinityMask(new_affinity_mask);
485 ideal_core = new_core; 461 ideal_core = new_core;
486 if (old_affinity_mask != new_affinity_mask) { 462 if (old_affinity_mask.GetAffinityMask() != new_affinity_mask) {
487 const s32 old_core = processor_id; 463 const s32 old_core = processor_id;
488 if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) { 464 if (processor_id >= 0 && !affinity_mask.GetAffinity(processor_id)) {
489 if (static_cast<s32>(ideal_core) < 0) { 465 if (static_cast<s32>(ideal_core) < 0) {
490 processor_id = HighestSetCore(affinity_mask, Core::Hardware::NUM_CPU_CORES); 466 processor_id = HighestSetCore(affinity_mask.GetAffinityMask(),
467 Core::Hardware::NUM_CPU_CORES);
491 } else { 468 } else {
492 processor_id = ideal_core; 469 processor_id = ideal_core;
493 } 470 }
494 } 471 }
495 kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core); 472 KScheduler::OnThreadAffinityMaskChanged(kernel, this, old_affinity_mask, old_core);
496 } 473 }
497 } 474 }
498 return RESULT_SUCCESS; 475 return RESULT_SUCCESS;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index a75071e9b..11ef29888 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <functional> 8#include <functional>
8#include <string> 9#include <string>
9#include <utility> 10#include <utility>
@@ -12,6 +13,7 @@
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/spin_lock.h" 14#include "common/spin_lock.h"
14#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
16#include "core/hle/kernel/k_affinity_mask.h"
15#include "core/hle/kernel/object.h" 17#include "core/hle/kernel/object.h"
16#include "core/hle/kernel/synchronization_object.h" 18#include "core/hle/kernel/synchronization_object.h"
17#include "core/hle/result.h" 19#include "core/hle/result.h"
@@ -27,10 +29,10 @@ class System;
27 29
28namespace Kernel { 30namespace Kernel {
29 31
30class GlobalScheduler; 32class GlobalSchedulerContext;
31class KernelCore; 33class KernelCore;
32class Process; 34class Process;
33class Scheduler; 35class KScheduler;
34 36
35enum ThreadPriority : u32 { 37enum ThreadPriority : u32 {
36 THREADPRIO_HIGHEST = 0, ///< Highest thread priority 38 THREADPRIO_HIGHEST = 0, ///< Highest thread priority
@@ -345,8 +347,12 @@ public:
345 347
346 void SetStatus(ThreadStatus new_status); 348 void SetStatus(ThreadStatus new_status);
347 349
348 u64 GetLastRunningTicks() const { 350 s64 GetLastScheduledTick() const {
349 return last_running_ticks; 351 return this->last_scheduled_tick;
352 }
353
354 void SetLastScheduledTick(s64 tick) {
355 this->last_scheduled_tick = tick;
350 } 356 }
351 357
352 u64 GetTotalCPUTimeTicks() const { 358 u64 GetTotalCPUTimeTicks() const {
@@ -361,10 +367,18 @@ public:
361 return processor_id; 367 return processor_id;
362 } 368 }
363 369
370 s32 GetActiveCore() const {
371 return GetProcessorID();
372 }
373
364 void SetProcessorID(s32 new_core) { 374 void SetProcessorID(s32 new_core) {
365 processor_id = new_core; 375 processor_id = new_core;
366 } 376 }
367 377
378 void SetActiveCore(s32 new_core) {
379 processor_id = new_core;
380 }
381
368 Process* GetOwnerProcess() { 382 Process* GetOwnerProcess() {
369 return owner_process; 383 return owner_process;
370 } 384 }
@@ -469,7 +483,7 @@ public:
469 return ideal_core; 483 return ideal_core;
470 } 484 }
471 485
472 u64 GetAffinityMask() const { 486 const KAffinityMask& GetAffinityMask() const {
473 return affinity_mask; 487 return affinity_mask;
474 } 488 }
475 489
@@ -478,21 +492,12 @@ public:
478 /// Sleeps this thread for the given amount of nanoseconds. 492 /// Sleeps this thread for the given amount of nanoseconds.
479 ResultCode Sleep(s64 nanoseconds); 493 ResultCode Sleep(s64 nanoseconds);
480 494
481 /// Yields this thread without rebalancing loads. 495 s64 GetYieldScheduleCount() const {
482 std::pair<ResultCode, bool> YieldSimple(); 496 return this->schedule_count;
483
484 /// Yields this thread and does a load rebalancing.
485 std::pair<ResultCode, bool> YieldAndBalanceLoad();
486
487 /// Yields this thread and if the core is left idle, loads are rebalanced
488 std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing();
489
490 void IncrementYieldCount() {
491 yield_count++;
492 } 497 }
493 498
494 u64 GetYieldCount() const { 499 void SetYieldScheduleCount(s64 count) {
495 return yield_count; 500 this->schedule_count = count;
496 } 501 }
497 502
498 ThreadSchedStatus GetSchedulingStatus() const { 503 ThreadSchedStatus GetSchedulingStatus() const {
@@ -568,9 +573,59 @@ public:
568 return has_exited; 573 return has_exited;
569 } 574 }
570 575
576 class QueueEntry {
577 public:
578 constexpr QueueEntry() = default;
579
580 constexpr void Initialize() {
581 this->prev = nullptr;
582 this->next = nullptr;
583 }
584
585 constexpr Thread* GetPrev() const {
586 return this->prev;
587 }
588 constexpr Thread* GetNext() const {
589 return this->next;
590 }
591 constexpr void SetPrev(Thread* thread) {
592 this->prev = thread;
593 }
594 constexpr void SetNext(Thread* thread) {
595 this->next = thread;
596 }
597
598 private:
599 Thread* prev{};
600 Thread* next{};
601 };
602
603 QueueEntry& GetPriorityQueueEntry(s32 core) {
604 return this->per_core_priority_queue_entry[core];
605 }
606
607 const QueueEntry& GetPriorityQueueEntry(s32 core) const {
608 return this->per_core_priority_queue_entry[core];
609 }
610
611 s32 GetDisableDispatchCount() const {
612 return disable_count;
613 }
614
615 void DisableDispatch() {
616 ASSERT(GetDisableDispatchCount() >= 0);
617 disable_count++;
618 }
619
620 void EnableDispatch() {
621 ASSERT(GetDisableDispatchCount() > 0);
622 disable_count--;
623 }
624
571private: 625private:
572 friend class GlobalScheduler; 626 friend class GlobalSchedulerContext;
573 friend class Scheduler; 627 friend class KScheduler;
628 friend class Process;
574 629
575 void SetSchedulingStatus(ThreadSchedStatus new_status); 630 void SetSchedulingStatus(ThreadSchedStatus new_status);
576 void AddSchedulingFlag(ThreadSchedFlags flag); 631 void AddSchedulingFlag(ThreadSchedFlags flag);
@@ -583,12 +638,14 @@ private:
583 ThreadContext64 context_64{}; 638 ThreadContext64 context_64{};
584 std::shared_ptr<Common::Fiber> host_context{}; 639 std::shared_ptr<Common::Fiber> host_context{};
585 640
586 u64 thread_id = 0;
587
588 ThreadStatus status = ThreadStatus::Dormant; 641 ThreadStatus status = ThreadStatus::Dormant;
642 u32 scheduling_state = 0;
643
644 u64 thread_id = 0;
589 645
590 VAddr entry_point = 0; 646 VAddr entry_point = 0;
591 VAddr stack_top = 0; 647 VAddr stack_top = 0;
648 std::atomic_int disable_count = 0;
592 649
593 ThreadType type; 650 ThreadType type;
594 651
@@ -602,9 +659,8 @@ private:
602 u32 current_priority = 0; 659 u32 current_priority = 0;
603 660
604 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. 661 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
605 u64 last_running_ticks = 0; ///< CPU tick when thread was last running 662 s64 schedule_count{};
606 u64 yield_count = 0; ///< Number of redundant yields carried by this thread. 663 s64 last_scheduled_tick{};
607 ///< a redundant yield is one where no scheduling is changed
608 664
609 s32 processor_id = 0; 665 s32 processor_id = 0;
610 666
@@ -646,16 +702,16 @@ private:
646 Handle hle_time_event; 702 Handle hle_time_event;
647 SynchronizationObject* hle_object; 703 SynchronizationObject* hle_object;
648 704
649 Scheduler* scheduler = nullptr; 705 KScheduler* scheduler = nullptr;
706
707 std::array<QueueEntry, Core::Hardware::NUM_CPU_CORES> per_core_priority_queue_entry{};
650 708
651 u32 ideal_core{0xFFFFFFFF}; 709 u32 ideal_core{0xFFFFFFFF};
652 u64 affinity_mask{0x1}; 710 KAffinityMask affinity_mask{};
653 711
654 s32 ideal_core_override = -1; 712 s32 ideal_core_override = -1;
655 u64 affinity_mask_override = 0x1;
656 u32 affinity_override_count = 0; 713 u32 affinity_override_count = 0;
657 714
658 u32 scheduling_state = 0;
659 u32 pausing_state = 0; 715 u32 pausing_state = 0;
660 bool is_running = false; 716 bool is_running = false;
661 bool is_waiting_on_sync = false; 717 bool is_waiting_on_sync = false;
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index caf329bfb..79628e2b4 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -7,8 +7,8 @@
7#include "core/core_timing.h" 7#include "core/core_timing.h"
8#include "core/core_timing_util.h" 8#include "core/core_timing_util.h"
9#include "core/hle/kernel/handle_table.h" 9#include "core/hle/kernel/handle_table.h"
10#include "core/hle/kernel/k_scheduler.h"
10#include "core/hle/kernel/kernel.h" 11#include "core/hle/kernel/kernel.h"
11#include "core/hle/kernel/scheduler.h"
12#include "core/hle/kernel/thread.h" 12#include "core/hle/kernel/thread.h"
13#include "core/hle/kernel/time_manager.h" 13#include "core/hle/kernel/time_manager.h"
14 14
@@ -18,12 +18,18 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
18 time_manager_event_type = Core::Timing::CreateEvent( 18 time_manager_event_type = Core::Timing::CreateEvent(
19 "Kernel::TimeManagerCallback", 19 "Kernel::TimeManagerCallback",
20 [this](std::uintptr_t thread_handle, std::chrono::nanoseconds) { 20 [this](std::uintptr_t thread_handle, std::chrono::nanoseconds) {
21 const SchedulerLock lock(system.Kernel()); 21 const KScopedSchedulerLock lock(system.Kernel());
22 const auto proper_handle = static_cast<Handle>(thread_handle); 22 const auto proper_handle = static_cast<Handle>(thread_handle);
23 if (cancelled_events[proper_handle]) { 23
24 return; 24 std::shared_ptr<Thread> thread;
25 {
26 std::lock_guard lock{mutex};
27 if (cancelled_events[proper_handle]) {
28 return;
29 }
30 thread = system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
25 } 31 }
26 auto thread = this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); 32
27 if (thread) { 33 if (thread) {
28 // Thread can be null if process has exited 34 // Thread can be null if process has exited
29 thread->OnWakeUp(); 35 thread->OnWakeUp();
@@ -56,6 +62,7 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
56} 62}
57 63
58void TimeManager::CancelTimeEvent(Thread* time_task) { 64void TimeManager::CancelTimeEvent(Thread* time_task) {
65 std::lock_guard lock{mutex};
59 const Handle event_handle = time_task->GetGlobalHandle(); 66 const Handle event_handle = time_task->GetGlobalHandle();
60 UnscheduleTimeEvent(event_handle); 67 UnscheduleTimeEvent(event_handle);
61} 68}
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 38d877f6e..cb13210e5 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1092,14 +1092,14 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx)
1092 const auto applet_id = rp.PopRaw<Applets::AppletId>(); 1092 const auto applet_id = rp.PopRaw<Applets::AppletId>();
1093 const auto applet_mode = rp.PopRaw<u32>(); 1093 const auto applet_mode = rp.PopRaw<u32>();
1094 1094
1095 LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", 1095 LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", applet_id,
1096 static_cast<u32>(applet_id), applet_mode); 1096 applet_mode);
1097 1097
1098 const auto& applet_manager{system.GetAppletManager()}; 1098 const auto& applet_manager{system.GetAppletManager()};
1099 const auto applet = applet_manager.GetApplet(applet_id); 1099 const auto applet = applet_manager.GetApplet(applet_id);
1100 1100
1101 if (applet == nullptr) { 1101 if (applet == nullptr) {
1102 LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id)); 1102 LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", applet_id);
1103 1103
1104 IPC::ResponseBuilder rb{ctx, 2}; 1104 IPC::ResponseBuilder rb{ctx, 2};
1105 rb.Push(RESULT_UNKNOWN); 1105 rb.Push(RESULT_UNKNOWN);
@@ -1290,7 +1290,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
1290 IPC::RequestParser rp{ctx}; 1290 IPC::RequestParser rp{ctx};
1291 const auto kind = rp.PopEnum<LaunchParameterKind>(); 1291 const auto kind = rp.PopEnum<LaunchParameterKind>();
1292 1292
1293 LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind)); 1293 LOG_DEBUG(Service_AM, "called, kind={:08X}", kind);
1294 1294
1295 if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { 1295 if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) {
1296 const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) { 1296 const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) {
@@ -1537,8 +1537,8 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) {
1537 IPC::RequestParser rp{ctx}; 1537 IPC::RequestParser rp{ctx};
1538 const auto [type, user_id] = rp.PopRaw<Parameters>(); 1538 const auto [type, user_id] = rp.PopRaw<Parameters>();
1539 1539
1540 LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type), 1540 LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", type, user_id[1],
1541 user_id[1], user_id[0]); 1541 user_id[0]);
1542 1542
1543 const auto size = system.GetFileSystemController().ReadSaveDataSize( 1543 const auto size = system.GetFileSystemController().ReadSaveDataSize(
1544 type, system.CurrentProcess()->GetTitleID(), user_id); 1544 type, system.CurrentProcess()->GetTitleID(), user_id);
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp
index 2b626bb40..08676c3fc 100644
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -142,14 +142,14 @@ void Applet::Initialize() {
142 142
143AppletFrontendSet::AppletFrontendSet() = default; 143AppletFrontendSet::AppletFrontendSet() = default;
144 144
145AppletFrontendSet::AppletFrontendSet(ControllerApplet controller, ECommerceApplet e_commerce, 145AppletFrontendSet::AppletFrontendSet(ControllerApplet controller_applet, ErrorApplet error_applet,
146 ErrorApplet error, ParentalControlsApplet parental_controls, 146 ParentalControlsApplet parental_controls_applet,
147 PhotoViewer photo_viewer, ProfileSelect profile_select, 147 PhotoViewer photo_viewer_, ProfileSelect profile_select_,
148 SoftwareKeyboard software_keyboard, WebBrowser web_browser) 148 SoftwareKeyboard software_keyboard_, WebBrowser web_browser_)
149 : controller{std::move(controller)}, e_commerce{std::move(e_commerce)}, error{std::move(error)}, 149 : controller{std::move(controller_applet)}, error{std::move(error_applet)},
150 parental_controls{std::move(parental_controls)}, photo_viewer{std::move(photo_viewer)}, 150 parental_controls{std::move(parental_controls_applet)},
151 profile_select{std::move(profile_select)}, software_keyboard{std::move(software_keyboard)}, 151 photo_viewer{std::move(photo_viewer_)}, profile_select{std::move(profile_select_)},
152 web_browser{std::move(web_browser)} {} 152 software_keyboard{std::move(software_keyboard_)}, web_browser{std::move(web_browser_)} {}
153 153
154AppletFrontendSet::~AppletFrontendSet() = default; 154AppletFrontendSet::~AppletFrontendSet() = default;
155 155
@@ -170,10 +170,6 @@ void AppletManager::SetAppletFrontendSet(AppletFrontendSet set) {
170 frontend.controller = std::move(set.controller); 170 frontend.controller = std::move(set.controller);
171 } 171 }
172 172
173 if (set.e_commerce != nullptr) {
174 frontend.e_commerce = std::move(set.e_commerce);
175 }
176
177 if (set.error != nullptr) { 173 if (set.error != nullptr) {
178 frontend.error = std::move(set.error); 174 frontend.error = std::move(set.error);
179 } 175 }
@@ -210,10 +206,6 @@ void AppletManager::SetDefaultAppletsIfMissing() {
210 std::make_unique<Core::Frontend::DefaultControllerApplet>(system.ServiceManager()); 206 std::make_unique<Core::Frontend::DefaultControllerApplet>(system.ServiceManager());
211 } 207 }
212 208
213 if (frontend.e_commerce == nullptr) {
214 frontend.e_commerce = std::make_unique<Core::Frontend::DefaultECommerceApplet>();
215 }
216
217 if (frontend.error == nullptr) { 209 if (frontend.error == nullptr) {
218 frontend.error = std::make_unique<Core::Frontend::DefaultErrorApplet>(); 210 frontend.error = std::make_unique<Core::Frontend::DefaultErrorApplet>();
219 } 211 }
@@ -257,13 +249,14 @@ std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id) const {
257 return std::make_shared<ProfileSelect>(system, *frontend.profile_select); 249 return std::make_shared<ProfileSelect>(system, *frontend.profile_select);
258 case AppletId::SoftwareKeyboard: 250 case AppletId::SoftwareKeyboard:
259 return std::make_shared<SoftwareKeyboard>(system, *frontend.software_keyboard); 251 return std::make_shared<SoftwareKeyboard>(system, *frontend.software_keyboard);
252 case AppletId::Web:
253 case AppletId::Shop:
254 case AppletId::OfflineWeb:
255 case AppletId::LoginShare:
256 case AppletId::WebAuth:
257 return std::make_shared<WebBrowser>(system, *frontend.web_browser);
260 case AppletId::PhotoViewer: 258 case AppletId::PhotoViewer:
261 return std::make_shared<PhotoViewer>(system, *frontend.photo_viewer); 259 return std::make_shared<PhotoViewer>(system, *frontend.photo_viewer);
262 case AppletId::LibAppletShop:
263 return std::make_shared<WebBrowser>(system, *frontend.web_browser,
264 frontend.e_commerce.get());
265 case AppletId::LibAppletOff:
266 return std::make_shared<WebBrowser>(system, *frontend.web_browser);
267 default: 260 default:
268 UNIMPLEMENTED_MSG( 261 UNIMPLEMENTED_MSG(
269 "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.", 262 "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.",
diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h
index a1f4cf897..4fd792c05 100644
--- a/src/core/hle/service/am/applets/applets.h
+++ b/src/core/hle/service/am/applets/applets.h
@@ -50,13 +50,13 @@ enum class AppletId : u32 {
50 ProfileSelect = 0x10, 50 ProfileSelect = 0x10,
51 SoftwareKeyboard = 0x11, 51 SoftwareKeyboard = 0x11,
52 MiiEdit = 0x12, 52 MiiEdit = 0x12,
53 LibAppletWeb = 0x13, 53 Web = 0x13,
54 LibAppletShop = 0x14, 54 Shop = 0x14,
55 PhotoViewer = 0x15, 55 PhotoViewer = 0x15,
56 Settings = 0x16, 56 Settings = 0x16,
57 LibAppletOff = 0x17, 57 OfflineWeb = 0x17,
58 LibAppletWhitelisted = 0x18, 58 LoginShare = 0x18,
59 LibAppletAuth = 0x19, 59 WebAuth = 0x19,
60 MyPage = 0x1A, 60 MyPage = 0x1A,
61}; 61};
62 62
@@ -157,7 +157,6 @@ protected:
157 157
158struct AppletFrontendSet { 158struct AppletFrontendSet {
159 using ControllerApplet = std::unique_ptr<Core::Frontend::ControllerApplet>; 159 using ControllerApplet = std::unique_ptr<Core::Frontend::ControllerApplet>;
160 using ECommerceApplet = std::unique_ptr<Core::Frontend::ECommerceApplet>;
161 using ErrorApplet = std::unique_ptr<Core::Frontend::ErrorApplet>; 160 using ErrorApplet = std::unique_ptr<Core::Frontend::ErrorApplet>;
162 using ParentalControlsApplet = std::unique_ptr<Core::Frontend::ParentalControlsApplet>; 161 using ParentalControlsApplet = std::unique_ptr<Core::Frontend::ParentalControlsApplet>;
163 using PhotoViewer = std::unique_ptr<Core::Frontend::PhotoViewerApplet>; 162 using PhotoViewer = std::unique_ptr<Core::Frontend::PhotoViewerApplet>;
@@ -166,10 +165,10 @@ struct AppletFrontendSet {
166 using WebBrowser = std::unique_ptr<Core::Frontend::WebBrowserApplet>; 165 using WebBrowser = std::unique_ptr<Core::Frontend::WebBrowserApplet>;
167 166
168 AppletFrontendSet(); 167 AppletFrontendSet();
169 AppletFrontendSet(ControllerApplet controller, ECommerceApplet e_commerce, ErrorApplet error, 168 AppletFrontendSet(ControllerApplet controller_applet, ErrorApplet error_applet,
170 ParentalControlsApplet parental_controls, PhotoViewer photo_viewer, 169 ParentalControlsApplet parental_controls_applet, PhotoViewer photo_viewer_,
171 ProfileSelect profile_select, SoftwareKeyboard software_keyboard, 170 ProfileSelect profile_select_, SoftwareKeyboard software_keyboard_,
172 WebBrowser web_browser); 171 WebBrowser web_browser_);
173 ~AppletFrontendSet(); 172 ~AppletFrontendSet();
174 173
175 AppletFrontendSet(const AppletFrontendSet&) = delete; 174 AppletFrontendSet(const AppletFrontendSet&) = delete;
@@ -179,7 +178,6 @@ struct AppletFrontendSet {
179 AppletFrontendSet& operator=(AppletFrontendSet&&) noexcept; 178 AppletFrontendSet& operator=(AppletFrontendSet&&) noexcept;
180 179
181 ControllerApplet controller; 180 ControllerApplet controller;
182 ECommerceApplet e_commerce;
183 ErrorApplet error; 181 ErrorApplet error;
184 ParentalControlsApplet parental_controls; 182 ParentalControlsApplet parental_controls;
185 PhotoViewer photo_viewer; 183 PhotoViewer photo_viewer;
diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp
index e8ea4248b..7edfca64e 100644
--- a/src/core/hle/service/am/applets/controller.cpp
+++ b/src/core/hle/service/am/applets/controller.cpp
@@ -29,14 +29,14 @@ static Core::Frontend::ControllerParameters ConvertToFrontendParameters(
29 npad_style_set.raw = private_arg.style_set; 29 npad_style_set.raw = private_arg.style_set;
30 30
31 return { 31 return {
32 .min_players = std::max(s8(1), header.player_count_min), 32 .min_players = std::max(s8{1}, header.player_count_min),
33 .max_players = header.player_count_max, 33 .max_players = header.player_count_max,
34 .keep_controllers_connected = header.enable_take_over_connection, 34 .keep_controllers_connected = header.enable_take_over_connection,
35 .enable_single_mode = header.enable_single_mode, 35 .enable_single_mode = header.enable_single_mode,
36 .enable_border_color = header.enable_identification_color, 36 .enable_border_color = header.enable_identification_color,
37 .border_colors = identification_colors, 37 .border_colors = std::move(identification_colors),
38 .enable_explain_text = enable_text, 38 .enable_explain_text = enable_text,
39 .explain_text = text, 39 .explain_text = std::move(text),
40 .allow_pro_controller = npad_style_set.pro_controller == 1, 40 .allow_pro_controller = npad_style_set.pro_controller == 1,
41 .allow_handheld = npad_style_set.handheld == 1, 41 .allow_handheld = npad_style_set.handheld == 1,
42 .allow_dual_joycons = npad_style_set.joycon_dual == 1, 42 .allow_dual_joycons = npad_style_set.joycon_dual == 1,
@@ -227,15 +227,14 @@ void Controller::ConfigurationComplete() {
227 // If enable_single_mode is enabled, player_count is 1 regardless of any other parameters. 227 // If enable_single_mode is enabled, player_count is 1 regardless of any other parameters.
228 // Otherwise, only count connected players from P1-P8. 228 // Otherwise, only count connected players from P1-P8.
229 result_info.player_count = 229 result_info.player_count =
230 is_single_mode ? 1 230 is_single_mode
231 : static_cast<s8>(std::count_if( 231 ? 1
232 players.begin(), players.end() - 2, 232 : static_cast<s8>(std::count_if(players.begin(), players.end() - 2,
233 [](Settings::PlayerInput player) { return player.connected; })); 233 [](const auto& player) { return player.connected; }));
234 234
235 result_info.selected_id = HID::Controller_NPad::IndexToNPad( 235 result_info.selected_id = HID::Controller_NPad::IndexToNPad(std::distance(
236 std::distance(players.begin(), 236 players.begin(), std::find_if(players.begin(), players.end(),
237 std::find_if(players.begin(), players.end(), 237 [](const auto& player) { return player.connected; })));
238 [](Settings::PlayerInput player) { return player.connected; })));
239 238
240 result_info.result = 0; 239 result_info.result = 0;
241 240
diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp
index dcd4b2a35..d85505082 100644
--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -125,7 +125,7 @@ void Error::Initialize() {
125 error_code = Decode64BitError(args->error_record.error_code_64); 125 error_code = Decode64BitError(args->error_record.error_code_64);
126 break; 126 break;
127 default: 127 default:
128 UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", static_cast<u8>(mode)); 128 UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode);
129 } 129 }
130} 130}
131 131
@@ -179,7 +179,7 @@ void Error::Execute() {
179 error_code, std::chrono::seconds{args->error_record.posix_time}, callback); 179 error_code, std::chrono::seconds{args->error_record.posix_time}, callback);
180 break; 180 break;
181 default: 181 default:
182 UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", static_cast<u8>(mode)); 182 UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode);
183 DisplayCompleted(); 183 DisplayCompleted();
184 } 184 }
185} 185}
diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp
index bdb6fd464..4d1df5cbe 100644
--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ b/src/core/hle/service/am/applets/general_backend.cpp
@@ -90,7 +90,7 @@ void Auth::Execute() {
90 const auto unimplemented_log = [this] { 90 const auto unimplemented_log = [this] {
91 UNIMPLEMENTED_MSG("Unimplemented Auth applet type for type={:08X}, arg0={:02X}, " 91 UNIMPLEMENTED_MSG("Unimplemented Auth applet type for type={:08X}, arg0={:02X}, "
92 "arg1={:02X}, arg2={:02X}", 92 "arg1={:02X}, arg2={:02X}",
93 static_cast<u32>(type), arg0, arg1, arg2); 93 type, arg0, arg1, arg2);
94 }; 94 };
95 95
96 switch (type) { 96 switch (type) {
@@ -136,7 +136,7 @@ void Auth::Execute() {
136} 136}
137 137
138void Auth::AuthFinished(bool is_successful) { 138void Auth::AuthFinished(bool is_successful) {
139 this->successful = is_successful; 139 successful = is_successful;
140 140
141 struct Return { 141 struct Return {
142 ResultCode result_code; 142 ResultCode result_code;
@@ -193,7 +193,7 @@ void PhotoViewer::Execute() {
193 frontend.ShowAllPhotos(callback); 193 frontend.ShowAllPhotos(callback);
194 break; 194 break;
195 default: 195 default:
196 UNIMPLEMENTED_MSG("Unimplemented PhotoViewer applet mode={:02X}!", static_cast<u8>(mode)); 196 UNIMPLEMENTED_MSG("Unimplemented PhotoViewer applet mode={:02X}!", mode);
197 } 197 }
198} 198}
199 199
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
index c3b6b706a..2ab420789 100644
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -1,558 +1,478 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <cstring>
7#include <vector>
8
9#include "common/assert.h" 5#include "common/assert.h"
10#include "common/common_funcs.h"
11#include "common/common_paths.h" 6#include "common/common_paths.h"
12#include "common/file_util.h" 7#include "common/file_util.h"
13#include "common/hex_util.h"
14#include "common/logging/log.h" 8#include "common/logging/log.h"
15#include "common/string_util.h" 9#include "common/string_util.h"
16#include "core/core.h" 10#include "core/core.h"
17#include "core/file_sys/content_archive.h" 11#include "core/file_sys/content_archive.h"
18#include "core/file_sys/mode.h" 12#include "core/file_sys/mode.h"
19#include "core/file_sys/nca_metadata.h" 13#include "core/file_sys/nca_metadata.h"
14#include "core/file_sys/patch_manager.h"
20#include "core/file_sys/registered_cache.h" 15#include "core/file_sys/registered_cache.h"
21#include "core/file_sys/romfs.h" 16#include "core/file_sys/romfs.h"
22#include "core/file_sys/system_archive/system_archive.h" 17#include "core/file_sys/system_archive/system_archive.h"
23#include "core/file_sys/vfs_types.h" 18#include "core/file_sys/vfs_vector.h"
24#include "core/frontend/applets/general_frontend.h"
25#include "core/frontend/applets/web_browser.h" 19#include "core/frontend/applets/web_browser.h"
26#include "core/hle/kernel/process.h" 20#include "core/hle/kernel/process.h"
21#include "core/hle/result.h"
22#include "core/hle/service/am/am.h"
27#include "core/hle/service/am/applets/web_browser.h" 23#include "core/hle/service/am/applets/web_browser.h"
28#include "core/hle/service/filesystem/filesystem.h" 24#include "core/hle/service/filesystem/filesystem.h"
29#include "core/loader/loader.h" 25#include "core/hle/service/ns/pl_u.h"
30 26
31namespace Service::AM::Applets { 27namespace Service::AM::Applets {
32 28
33enum class WebArgTLVType : u16 {
34 InitialURL = 0x1,
35 ShopArgumentsURL = 0x2, ///< TODO(DarkLordZach): This is not the official name.
36 CallbackURL = 0x3,
37 CallbackableURL = 0x4,
38 ApplicationID = 0x5,
39 DocumentPath = 0x6,
40 DocumentKind = 0x7,
41 SystemDataID = 0x8,
42 ShareStartPage = 0x9,
43 Whitelist = 0xA,
44 News = 0xB,
45 UserID = 0xE,
46 AlbumEntry0 = 0xF,
47 ScreenShotEnabled = 0x10,
48 EcClientCertEnabled = 0x11,
49 Unk12 = 0x12,
50 PlayReportEnabled = 0x13,
51 Unk14 = 0x14,
52 Unk15 = 0x15,
53 BootDisplayKind = 0x17,
54 BackgroundKind = 0x18,
55 FooterEnabled = 0x19,
56 PointerEnabled = 0x1A,
57 LeftStickMode = 0x1B,
58 KeyRepeatFrame1 = 0x1C,
59 KeyRepeatFrame2 = 0x1D,
60 BootAsMediaPlayerInv = 0x1E,
61 DisplayUrlKind = 0x1F,
62 BootAsMediaPlayer = 0x21,
63 ShopJumpEnabled = 0x22,
64 MediaAutoPlayEnabled = 0x23,
65 LobbyParameter = 0x24,
66 ApplicationAlbumEntry = 0x26,
67 JsExtensionEnabled = 0x27,
68 AdditionalCommentText = 0x28,
69 TouchEnabledOnContents = 0x29,
70 UserAgentAdditionalString = 0x2A,
71 AdditionalMediaData0 = 0x2B,
72 MediaPlayerAutoCloseEnabled = 0x2C,
73 PageCacheEnabled = 0x2D,
74 WebAudioEnabled = 0x2E,
75 Unk2F = 0x2F,
76 YouTubeVideoWhitelist = 0x31,
77 FooterFixedKind = 0x32,
78 PageFadeEnabled = 0x33,
79 MediaCreatorApplicationRatingAge = 0x34,
80 BootLoadingIconEnabled = 0x35,
81 PageScrollIndicationEnabled = 0x36,
82 MediaPlayerSpeedControlEnabled = 0x37,
83 AlbumEntry1 = 0x38,
84 AlbumEntry2 = 0x39,
85 AlbumEntry3 = 0x3A,
86 AdditionalMediaData1 = 0x3B,
87 AdditionalMediaData2 = 0x3C,
88 AdditionalMediaData3 = 0x3D,
89 BootFooterButton = 0x3E,
90 OverrideWebAudioVolume = 0x3F,
91 OverrideMediaAudioVolume = 0x40,
92 BootMode = 0x41,
93 WebSessionEnabled = 0x42,
94};
95
96enum class ShimKind : u32 {
97 Shop = 1,
98 Login = 2,
99 Offline = 3,
100 Share = 4,
101 Web = 5,
102 Wifi = 6,
103 Lobby = 7,
104};
105
106enum class ShopWebTarget {
107 ApplicationInfo,
108 AddOnContentList,
109 SubscriptionList,
110 ConsumableItemList,
111 Home,
112 Settings,
113};
114
115namespace { 29namespace {
116 30
117constexpr std::size_t SHIM_KIND_COUNT = 0x8; 31template <typename T>
118 32void ParseRawValue(T& value, const std::vector<u8>& data) {
119struct WebArgHeader { 33 static_assert(std::is_trivially_copyable_v<T>,
120 u16 count; 34 "It's undefined behavior to use memcpy with non-trivially copyable objects");
121 INSERT_PADDING_BYTES(2); 35 std::memcpy(&value, data.data(), data.size());
122 ShimKind kind; 36}
123};
124static_assert(sizeof(WebArgHeader) == 0x8, "WebArgHeader has incorrect size.");
125
126struct WebArgTLV {
127 WebArgTLVType type;
128 u16 size;
129 u32 offset;
130};
131static_assert(sizeof(WebArgTLV) == 0x8, "WebArgTLV has incorrect size.");
132
133struct WebCommonReturnValue {
134 u32 result_code;
135 INSERT_PADDING_BYTES(0x4);
136 std::array<char, 0x1000> last_url;
137 u64 last_url_size;
138};
139static_assert(sizeof(WebCommonReturnValue) == 0x1010, "WebCommonReturnValue has incorrect size.");
140
141struct WebWifiPageArg {
142 INSERT_PADDING_BYTES(4);
143 std::array<char, 0x100> connection_test_url;
144 std::array<char, 0x400> initial_url;
145 std::array<u8, 0x10> nifm_network_uuid;
146 u32 nifm_requirement;
147};
148static_assert(sizeof(WebWifiPageArg) == 0x518, "WebWifiPageArg has incorrect size.");
149
150struct WebWifiReturnValue {
151 INSERT_PADDING_BYTES(4);
152 u32 result;
153};
154static_assert(sizeof(WebWifiReturnValue) == 0x8, "WebWifiReturnValue has incorrect size.");
155
156enum class OfflineWebSource : u32 {
157 OfflineHtmlPage = 0x1,
158 ApplicationLegalInformation = 0x2,
159 SystemDataPage = 0x3,
160};
161
162std::map<WebArgTLVType, std::vector<u8>> GetWebArguments(const std::vector<u8>& arg) {
163 if (arg.size() < sizeof(WebArgHeader))
164 return {};
165
166 WebArgHeader header{};
167 std::memcpy(&header, arg.data(), sizeof(WebArgHeader));
168
169 std::map<WebArgTLVType, std::vector<u8>> out;
170 u64 offset = sizeof(WebArgHeader);
171 for (std::size_t i = 0; i < header.count; ++i) {
172 if (arg.size() < (offset + sizeof(WebArgTLV)))
173 return out;
174 37
175 WebArgTLV tlv{}; 38template <typename T>
176 std::memcpy(&tlv, arg.data() + offset, sizeof(WebArgTLV)); 39T ParseRawValue(const std::vector<u8>& data) {
177 offset += sizeof(WebArgTLV); 40 T value;
41 ParseRawValue(value, data);
42 return value;
43}
178 44
179 offset += tlv.offset; 45std::string ParseStringValue(const std::vector<u8>& data) {
180 if (arg.size() < (offset + tlv.size)) 46 return Common::StringFromFixedZeroTerminatedBuffer(reinterpret_cast<const char*>(data.data()),
181 return out; 47 data.size());
48}
182 49
183 std::vector<u8> data(tlv.size); 50std::string GetMainURL(const std::string& url) {
184 std::memcpy(data.data(), arg.data() + offset, tlv.size); 51 const auto index = url.find('?');
185 offset += tlv.size;
186 52
187 out.insert_or_assign(tlv.type, data); 53 if (index == std::string::npos) {
54 return url;
188 } 55 }
189 56
190 return out; 57 return url.substr(0, index);
191} 58}
192 59
193FileSys::VirtualFile GetApplicationRomFS(const Core::System& system, u64 title_id, 60WebArgInputTLVMap ReadWebArgs(const std::vector<u8>& web_arg, WebArgHeader& web_arg_header) {
194 FileSys::ContentRecordType type) { 61 std::memcpy(&web_arg_header, web_arg.data(), sizeof(WebArgHeader));
195 const auto& installed{system.GetContentProvider()};
196 const auto res = installed.GetEntry(title_id, type);
197 62
198 if (res != nullptr) { 63 if (web_arg.size() == sizeof(WebArgHeader)) {
199 return res->GetRomFS(); 64 return {};
200 } 65 }
201 66
202 if (type == FileSys::ContentRecordType::Data) { 67 WebArgInputTLVMap input_tlv_map;
203 return FileSys::SystemArchive::SynthesizeSystemArchive(title_id); 68
69 u64 current_offset = sizeof(WebArgHeader);
70
71 for (std::size_t i = 0; i < web_arg_header.total_tlv_entries; ++i) {
72 if (web_arg.size() < current_offset + sizeof(WebArgInputTLV)) {
73 return input_tlv_map;
74 }
75
76 WebArgInputTLV input_tlv;
77 std::memcpy(&input_tlv, web_arg.data() + current_offset, sizeof(WebArgInputTLV));
78
79 current_offset += sizeof(WebArgInputTLV);
80
81 if (web_arg.size() < current_offset + input_tlv.arg_data_size) {
82 return input_tlv_map;
83 }
84
85 std::vector<u8> data(input_tlv.arg_data_size);
86 std::memcpy(data.data(), web_arg.data() + current_offset, input_tlv.arg_data_size);
87
88 current_offset += input_tlv.arg_data_size;
89
90 input_tlv_map.insert_or_assign(input_tlv.input_tlv_type, std::move(data));
204 } 91 }
205 92
206 return nullptr; 93 return input_tlv_map;
207} 94}
208 95
209} // Anonymous namespace 96FileSys::VirtualFile GetOfflineRomFS(Core::System& system, u64 title_id,
97 FileSys::ContentRecordType nca_type) {
98 if (nca_type == FileSys::ContentRecordType::Data) {
99 const auto nca =
100 system.GetFileSystemController().GetSystemNANDContents()->GetEntry(title_id, nca_type);
101
102 if (nca == nullptr) {
103 LOG_ERROR(Service_AM,
104 "NCA of type={} with title_id={:016X} is not found in the System NAND!",
105 nca_type, title_id);
106 return FileSys::SystemArchive::SynthesizeSystemArchive(title_id);
107 }
210 108
211WebBrowser::WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, 109 return nca->GetRomFS();
212 Core::Frontend::ECommerceApplet* frontend_e_commerce_) 110 } else {
213 : Applet{system_.Kernel()}, frontend(frontend_), 111 const auto nca = system.GetContentProvider().GetEntry(title_id, nca_type);
214 frontend_e_commerce(frontend_e_commerce_), system{system_} {}
215 112
216WebBrowser::~WebBrowser() = default; 113 if (nca == nullptr) {
114 LOG_ERROR(Service_AM,
115 "NCA of type={} with title_id={:016X} is not found in the ContentProvider!",
116 nca_type, title_id);
117 return nullptr;
118 }
217 119
218void WebBrowser::Initialize() { 120 const FileSys::PatchManager pm{title_id, system.GetFileSystemController(),
219 Applet::Initialize(); 121 system.GetContentProvider()};
220 122
221 complete = false; 123 return pm.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), nca_type);
222 temporary_dir.clear(); 124 }
223 filename.clear(); 125}
224 status = RESULT_SUCCESS;
225 126
226 const auto web_arg_storage = broker.PopNormalDataToApplet(); 127void ExtractSharedFonts(Core::System& system) {
227 ASSERT(web_arg_storage != nullptr); 128 static constexpr std::array<const char*, 7> DECRYPTED_SHARED_FONTS{
228 const auto& web_arg = web_arg_storage->GetData(); 129 "FontStandard.ttf",
130 "FontChineseSimplified.ttf",
131 "FontExtendedChineseSimplified.ttf",
132 "FontChineseTraditional.ttf",
133 "FontKorean.ttf",
134 "FontNintendoExtended.ttf",
135 "FontNintendoExtended2.ttf",
136 };
229 137
230 ASSERT(web_arg.size() >= 0x8); 138 for (std::size_t i = 0; i < NS::SHARED_FONTS.size(); ++i) {
231 std::memcpy(&kind, web_arg.data() + 0x4, sizeof(ShimKind)); 139 const auto fonts_dir = Common::FS::SanitizePath(
140 fmt::format("{}/fonts", Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)),
141 Common::FS::DirectorySeparator::PlatformDefault);
232 142
233 args = GetWebArguments(web_arg); 143 const auto font_file_path =
144 Common::FS::SanitizePath(fmt::format("{}/{}", fonts_dir, DECRYPTED_SHARED_FONTS[i]),
145 Common::FS::DirectorySeparator::PlatformDefault);
234 146
235 InitializeInternal(); 147 if (Common::FS::Exists(font_file_path)) {
236} 148 continue;
149 }
237 150
238bool WebBrowser::TransactionComplete() const { 151 const auto font = NS::SHARED_FONTS[i];
239 return complete; 152 const auto font_title_id = static_cast<u64>(font.first);
240}
241 153
242ResultCode WebBrowser::GetStatus() const { 154 const auto nca = system.GetFileSystemController().GetSystemNANDContents()->GetEntry(
243 return status; 155 font_title_id, FileSys::ContentRecordType::Data);
244}
245 156
246void WebBrowser::ExecuteInteractive() { 157 FileSys::VirtualFile romfs;
247 UNIMPLEMENTED_MSG("Unexpected interactive data recieved!");
248}
249 158
250void WebBrowser::Execute() { 159 if (!nca) {
251 if (complete) { 160 romfs = FileSys::SystemArchive::SynthesizeSystemArchive(font_title_id);
252 return; 161 } else {
253 } 162 romfs = nca->GetRomFS();
163 }
254 164
255 if (status != RESULT_SUCCESS) { 165 if (!romfs) {
256 complete = true; 166 LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} cannot be extracted!",
167 font_title_id);
168 continue;
169 }
257 170
258 // This is a workaround in order not to softlock yuzu when an error happens during the 171 const auto extracted_romfs = FileSys::ExtractRomFS(romfs);
259 // webapplet init. In order to avoid an svcBreak, the status is set to RESULT_SUCCESS
260 Finalize();
261 status = RESULT_SUCCESS;
262 172
263 return; 173 if (!extracted_romfs) {
264 } 174 LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} failed to extract!",
175 font_title_id);
176 continue;
177 }
265 178
266 ExecuteInternal(); 179 const auto font_file = extracted_romfs->GetFile(font.second);
267}
268 180
269void WebBrowser::UnpackRomFS() { 181 if (!font_file) {
270 if (unpacked) 182 LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} has no font file \"{}\"!",
271 return; 183 font_title_id, font.second);
184 continue;
185 }
272 186
273 ASSERT(offline_romfs != nullptr); 187 std::vector<u32> font_data_u32(font_file->GetSize() / sizeof(u32));
274 const auto dir = 188 font_file->ReadBytes<u32>(font_data_u32.data(), font_file->GetSize());
275 FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard);
276 const auto& vfs{system.GetFilesystem()};
277 const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite);
278 FileSys::VfsRawCopyD(dir, temp_dir);
279 189
280 unpacked = true; 190 std::transform(font_data_u32.begin(), font_data_u32.end(), font_data_u32.begin(),
281} 191 Common::swap32);
282 192
283void WebBrowser::Finalize() { 193 std::vector<u8> decrypted_data(font_file->GetSize() - 8);
284 complete = true;
285 194
286 WebCommonReturnValue out{}; 195 NS::DecryptSharedFontToTTF(font_data_u32, decrypted_data);
287 out.result_code = 0;
288 out.last_url_size = 0;
289 196
290 std::vector<u8> data(sizeof(WebCommonReturnValue)); 197 FileSys::VirtualFile decrypted_font = std::make_shared<FileSys::VectorVfsFile>(
291 std::memcpy(data.data(), &out, sizeof(WebCommonReturnValue)); 198 std::move(decrypted_data), DECRYPTED_SHARED_FONTS[i]);
292 199
293 broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(data))); 200 const auto temp_dir =
294 broker.SignalStateChanged(); 201 system.GetFilesystem()->CreateDirectory(fonts_dir, FileSys::Mode::ReadWrite);
202
203 const auto out_file = temp_dir->CreateFile(DECRYPTED_SHARED_FONTS[i]);
295 204
296 if (!temporary_dir.empty() && Common::FS::IsDirectory(temporary_dir)) { 205 FileSys::VfsRawCopy(decrypted_font, out_file);
297 Common::FS::DeleteDirRecursively(temporary_dir);
298 } 206 }
299} 207}
300 208
301void WebBrowser::InitializeInternal() { 209} // namespace
302 using WebAppletInitializer = void (WebBrowser::*)();
303 210
304 constexpr std::array<WebAppletInitializer, SHIM_KIND_COUNT> functions{ 211WebBrowser::WebBrowser(Core::System& system_, const Core::Frontend::WebBrowserApplet& frontend_)
305 nullptr, &WebBrowser::InitializeShop, 212 : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {}
306 nullptr, &WebBrowser::InitializeOffline,
307 nullptr, nullptr,
308 nullptr, nullptr,
309 };
310 213
311 const auto index = static_cast<u32>(kind); 214WebBrowser::~WebBrowser() = default;
312 215
313 if (index > functions.size() || functions[index] == nullptr) { 216void WebBrowser::Initialize() {
314 LOG_ERROR(Service_AM, "Invalid shim_kind={:08X}", index); 217 Applet::Initialize();
315 return;
316 }
317 218
318 const auto function = functions[index]; 219 LOG_INFO(Service_AM, "Initializing Web Browser Applet.");
319 (this->*function)();
320}
321 220
322void WebBrowser::ExecuteInternal() { 221 LOG_DEBUG(Service_AM,
323 using WebAppletExecutor = void (WebBrowser::*)(); 222 "Initializing Applet with common_args: arg_version={}, lib_version={}, "
223 "play_startup_sound={}, size={}, system_tick={}, theme_color={}",
224 common_args.arguments_version, common_args.library_version,
225 common_args.play_startup_sound, common_args.size, common_args.system_tick,
226 common_args.theme_color);
324 227
325 constexpr std::array<WebAppletExecutor, SHIM_KIND_COUNT> functions{ 228 web_applet_version = WebAppletVersion{common_args.library_version};
326 nullptr, &WebBrowser::ExecuteShop,
327 nullptr, &WebBrowser::ExecuteOffline,
328 nullptr, nullptr,
329 nullptr, nullptr,
330 };
331 229
332 const auto index = static_cast<u32>(kind); 230 const auto web_arg_storage = broker.PopNormalDataToApplet();
231 ASSERT(web_arg_storage != nullptr);
333 232
334 if (index > functions.size() || functions[index] == nullptr) { 233 const auto& web_arg = web_arg_storage->GetData();
335 LOG_ERROR(Service_AM, "Invalid shim_kind={:08X}", index); 234 ASSERT_OR_EXECUTE(web_arg.size() >= sizeof(WebArgHeader), { return; });
336 return;
337 }
338 235
339 const auto function = functions[index]; 236 web_arg_input_tlv_map = ReadWebArgs(web_arg, web_arg_header);
340 (this->*function)();
341}
342 237
343void WebBrowser::InitializeShop() { 238 LOG_DEBUG(Service_AM, "WebArgHeader: total_tlv_entries={}, shim_kind={}",
344 if (frontend_e_commerce == nullptr) { 239 web_arg_header.total_tlv_entries, web_arg_header.shim_kind);
345 LOG_ERROR(Service_AM, "Missing ECommerce Applet frontend!");
346 status = RESULT_UNKNOWN;
347 return;
348 }
349 240
350 const auto user_id_data = args.find(WebArgTLVType::UserID); 241 ExtractSharedFonts(system);
351 242
352 user_id = std::nullopt; 243 switch (web_arg_header.shim_kind) {
353 if (user_id_data != args.end()) { 244 case ShimKind::Shop:
354 user_id = u128{}; 245 InitializeShop();
355 std::memcpy(user_id->data(), user_id_data->second.data(), sizeof(u128)); 246 break;
247 case ShimKind::Login:
248 InitializeLogin();
249 break;
250 case ShimKind::Offline:
251 InitializeOffline();
252 break;
253 case ShimKind::Share:
254 InitializeShare();
255 break;
256 case ShimKind::Web:
257 InitializeWeb();
258 break;
259 case ShimKind::Wifi:
260 InitializeWifi();
261 break;
262 case ShimKind::Lobby:
263 InitializeLobby();
264 break;
265 default:
266 UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind);
267 break;
356 } 268 }
269}
357 270
358 const auto url = args.find(WebArgTLVType::ShopArgumentsURL); 271bool WebBrowser::TransactionComplete() const {
272 return complete;
273}
359 274
360 if (url == args.end()) { 275ResultCode WebBrowser::GetStatus() const {
361 LOG_ERROR(Service_AM, "Missing EShop Arguments URL for initialization!"); 276 return status;
362 status = RESULT_UNKNOWN; 277}
363 return;
364 }
365 278
366 std::vector<std::string> split_query; 279void WebBrowser::ExecuteInteractive() {
367 Common::SplitString(Common::StringFromFixedZeroTerminatedBuffer( 280 UNIMPLEMENTED_MSG("WebSession is not implemented");
368 reinterpret_cast<const char*>(url->second.data()), url->second.size()), 281}
369 '?', split_query);
370
371 // 2 -> Main URL '?' Query Parameters
372 // Less is missing info, More is malformed
373 if (split_query.size() != 2) {
374 LOG_ERROR(Service_AM, "EShop Arguments has more than one question mark, malformed");
375 status = RESULT_UNKNOWN;
376 return;
377 }
378 282
379 std::vector<std::string> queries; 283void WebBrowser::Execute() {
380 Common::SplitString(split_query[1], '&', queries); 284 switch (web_arg_header.shim_kind) {
285 case ShimKind::Shop:
286 ExecuteShop();
287 break;
288 case ShimKind::Login:
289 ExecuteLogin();
290 break;
291 case ShimKind::Offline:
292 ExecuteOffline();
293 break;
294 case ShimKind::Share:
295 ExecuteShare();
296 break;
297 case ShimKind::Web:
298 ExecuteWeb();
299 break;
300 case ShimKind::Wifi:
301 ExecuteWifi();
302 break;
303 case ShimKind::Lobby:
304 ExecuteLobby();
305 break;
306 default:
307 UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind);
308 WebBrowserExit(WebExitReason::EndButtonPressed);
309 break;
310 }
311}
381 312
382 const auto split_single_query = 313void WebBrowser::ExtractOfflineRomFS() {
383 [](const std::string& in) -> std::pair<std::string, std::string> { 314 LOG_DEBUG(Service_AM, "Extracting RomFS to {}", offline_cache_dir);
384 const auto index = in.find('=');
385 if (index == std::string::npos || index == in.size() - 1) {
386 return {in, ""};
387 }
388 315
389 return {in.substr(0, index), in.substr(index + 1)}; 316 const auto extracted_romfs_dir =
390 }; 317 FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard);
391 318
392 std::transform(queries.begin(), queries.end(), 319 const auto temp_dir =
393 std::inserter(shop_query, std::next(shop_query.begin())), split_single_query); 320 system.GetFilesystem()->CreateDirectory(offline_cache_dir, FileSys::Mode::ReadWrite);
394 321
395 const auto scene = shop_query.find("scene"); 322 FileSys::VfsRawCopyD(extracted_romfs_dir, temp_dir);
323}
396 324
397 if (scene == shop_query.end()) { 325void WebBrowser::WebBrowserExit(WebExitReason exit_reason, std::string last_url) {
398 LOG_ERROR(Service_AM, "No scene parameter was passed via shop query!"); 326 if ((web_arg_header.shim_kind == ShimKind::Share &&
399 status = RESULT_UNKNOWN; 327 web_applet_version >= WebAppletVersion::Version196608) ||
400 return; 328 (web_arg_header.shim_kind == ShimKind::Web &&
329 web_applet_version >= WebAppletVersion::Version524288)) {
330 // TODO: Push Output TLVs instead of a WebCommonReturnValue
401 } 331 }
402 332
403 const std::map<std::string, ShopWebTarget, std::less<>> target_map{ 333 WebCommonReturnValue web_common_return_value;
404 {"product_detail", ShopWebTarget::ApplicationInfo},
405 {"aocs", ShopWebTarget::AddOnContentList},
406 {"subscriptions", ShopWebTarget::SubscriptionList},
407 {"consumption", ShopWebTarget::ConsumableItemList},
408 {"settings", ShopWebTarget::Settings},
409 {"top", ShopWebTarget::Home},
410 };
411 334
412 const auto target = target_map.find(scene->second); 335 web_common_return_value.exit_reason = exit_reason;
413 if (target == target_map.end()) { 336 std::memcpy(&web_common_return_value.last_url, last_url.data(), last_url.size());
414 LOG_ERROR(Service_AM, "Scene for shop query is invalid! (scene={})", scene->second); 337 web_common_return_value.last_url_size = last_url.size();
415 status = RESULT_UNKNOWN;
416 return;
417 }
418 338
419 shop_web_target = target->second; 339 LOG_DEBUG(Service_AM, "WebCommonReturnValue: exit_reason={}, last_url={}, last_url_size={}",
340 exit_reason, last_url, last_url.size());
420 341
421 const auto title_id_data = shop_query.find("dst_app_id"); 342 complete = true;
422 if (title_id_data != shop_query.end()) { 343 std::vector<u8> out_data(sizeof(WebCommonReturnValue));
423 title_id = std::stoull(title_id_data->second, nullptr, 0x10); 344 std::memcpy(out_data.data(), &web_common_return_value, out_data.size());
424 } 345 broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
346 broker.SignalStateChanged();
347}
425 348
426 const auto mode_data = shop_query.find("mode"); 349bool WebBrowser::InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const {
427 if (mode_data != shop_query.end()) { 350 return web_arg_input_tlv_map.find(input_tlv_type) != web_arg_input_tlv_map.end();
428 shop_full_display = mode_data->second == "full";
429 }
430} 351}
431 352
432void WebBrowser::InitializeOffline() { 353std::optional<std::vector<u8>> WebBrowser::GetInputTLVData(WebArgInputTLVType input_tlv_type) {
433 if (args.find(WebArgTLVType::DocumentPath) == args.end() || 354 const auto map_it = web_arg_input_tlv_map.find(input_tlv_type);
434 args.find(WebArgTLVType::DocumentKind) == args.end() || 355
435 args.find(WebArgTLVType::ApplicationID) == args.end()) { 356 if (map_it == web_arg_input_tlv_map.end()) {
436 status = RESULT_UNKNOWN; 357 return std::nullopt;
437 LOG_ERROR(Service_AM, "Missing necessary parameters for initialization!");
438 } 358 }
439 359
440 const auto url_data = args[WebArgTLVType::DocumentPath]; 360 return map_it->second;
441 filename = Common::StringFromFixedZeroTerminatedBuffer( 361}
442 reinterpret_cast<const char*>(url_data.data()), url_data.size());
443 362
444 OfflineWebSource source; 363void WebBrowser::InitializeShop() {}
445 ASSERT(args[WebArgTLVType::DocumentKind].size() >= 4);
446 std::memcpy(&source, args[WebArgTLVType::DocumentKind].data(), sizeof(OfflineWebSource));
447 364
448 constexpr std::array<const char*, 3> WEB_SOURCE_NAMES{ 365void WebBrowser::InitializeLogin() {}
449 "manual", 366
450 "legal", 367void WebBrowser::InitializeOffline() {
451 "system", 368 const auto document_path =
452 }; 369 ParseStringValue(GetInputTLVData(WebArgInputTLVType::DocumentPath).value());
370
371 const auto document_kind =
372 ParseRawValue<DocumentKind>(GetInputTLVData(WebArgInputTLVType::DocumentKind).value());
373
374 std::string additional_paths;
453 375
454 temporary_dir = 376 switch (document_kind) {
455 Common::FS::SanitizePath(Common::FS::GetUserPath(Common::FS::UserPath::CacheDir) + 377 case DocumentKind::OfflineHtmlPage:
456 "web_applet_" + WEB_SOURCE_NAMES[static_cast<u32>(source) - 1], 378 default:
457 Common::FS::DirectorySeparator::PlatformDefault); 379 title_id = system.CurrentProcess()->GetTitleID();
458 Common::FS::DeleteDirRecursively(temporary_dir); 380 nca_type = FileSys::ContentRecordType::HtmlDocument;
459 381 additional_paths = "html-document";
460 u64 title_id = 0; // 0 corresponds to current process
461 ASSERT(args[WebArgTLVType::ApplicationID].size() >= 0x8);
462 std::memcpy(&title_id, args[WebArgTLVType::ApplicationID].data(), sizeof(u64));
463 FileSys::ContentRecordType type = FileSys::ContentRecordType::Data;
464
465 switch (source) {
466 case OfflineWebSource::OfflineHtmlPage:
467 // While there is an AppID TLV field, in official SW this is always ignored.
468 title_id = 0;
469 type = FileSys::ContentRecordType::HtmlDocument;
470 break; 382 break;
471 case OfflineWebSource::ApplicationLegalInformation: 383 case DocumentKind::ApplicationLegalInformation:
472 type = FileSys::ContentRecordType::LegalInformation; 384 title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::ApplicationID).value());
385 nca_type = FileSys::ContentRecordType::LegalInformation;
473 break; 386 break;
474 case OfflineWebSource::SystemDataPage: 387 case DocumentKind::SystemDataPage:
475 type = FileSys::ContentRecordType::Data; 388 title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::SystemDataID).value());
389 nca_type = FileSys::ContentRecordType::Data;
476 break; 390 break;
477 } 391 }
478 392
479 if (title_id == 0) { 393 static constexpr std::array<const char*, 3> RESOURCE_TYPES{
480 title_id = system.CurrentProcess()->GetTitleID(); 394 "manual",
481 } 395 "legal_information",
396 "system_data",
397 };
482 398
483 offline_romfs = GetApplicationRomFS(system, title_id, type); 399 offline_cache_dir = Common::FS::SanitizePath(
484 if (offline_romfs == nullptr) { 400 fmt::format("{}/offline_web_applet_{}/{:016X}",
485 status = RESULT_UNKNOWN; 401 Common::FS::GetUserPath(Common::FS::UserPath::CacheDir),
486 LOG_ERROR(Service_AM, "Failed to find offline data for request!"); 402 RESOURCE_TYPES[static_cast<u32>(document_kind) - 1], title_id),
487 } 403 Common::FS::DirectorySeparator::PlatformDefault);
488 404
489 std::string path_additional_directory; 405 offline_document = Common::FS::SanitizePath(
490 if (source == OfflineWebSource::OfflineHtmlPage) { 406 fmt::format("{}/{}/{}", offline_cache_dir, additional_paths, document_path),
491 path_additional_directory = std::string(DIR_SEP).append("html-document"); 407 Common::FS::DirectorySeparator::PlatformDefault);
492 } 408}
409
410void WebBrowser::InitializeShare() {}
493 411
494 filename = 412void WebBrowser::InitializeWeb() {
495 Common::FS::SanitizePath(temporary_dir + path_additional_directory + DIR_SEP + filename, 413 external_url = ParseStringValue(GetInputTLVData(WebArgInputTLVType::InitialURL).value());
496 Common::FS::DirectorySeparator::PlatformDefault);
497} 414}
498 415
416void WebBrowser::InitializeWifi() {}
417
418void WebBrowser::InitializeLobby() {}
419
499void WebBrowser::ExecuteShop() { 420void WebBrowser::ExecuteShop() {
500 const auto callback = [this]() { Finalize(); }; 421 LOG_WARNING(Service_AM, "(STUBBED) called, Shop Applet is not implemented");
422 WebBrowserExit(WebExitReason::EndButtonPressed);
423}
501 424
502 const auto check_optional_parameter = [this](const auto& p) { 425void WebBrowser::ExecuteLogin() {
503 if (!p.has_value()) { 426 LOG_WARNING(Service_AM, "(STUBBED) called, Login Applet is not implemented");
504 LOG_ERROR(Service_AM, "Missing one or more necessary parameters for execution!"); 427 WebBrowserExit(WebExitReason::EndButtonPressed);
505 status = RESULT_UNKNOWN; 428}
506 return false;
507 }
508 429
509 return true; 430void WebBrowser::ExecuteOffline() {
510 }; 431 const auto main_url = Common::FS::SanitizePath(GetMainURL(offline_document),
432 Common::FS::DirectorySeparator::PlatformDefault);
511 433
512 switch (shop_web_target) { 434 if (!Common::FS::Exists(main_url)) {
513 case ShopWebTarget::ApplicationInfo: 435 offline_romfs = GetOfflineRomFS(system, title_id, nca_type);
514 if (!check_optional_parameter(title_id)) 436
515 return; 437 if (offline_romfs == nullptr) {
516 frontend_e_commerce->ShowApplicationInformation(callback, *title_id, user_id, 438 LOG_ERROR(Service_AM,
517 shop_full_display, shop_extra_parameter); 439 "RomFS with title_id={:016X} and nca_type={} cannot be extracted!", title_id,
518 break; 440 nca_type);
519 case ShopWebTarget::AddOnContentList: 441 WebBrowserExit(WebExitReason::WindowClosed);
520 if (!check_optional_parameter(title_id))
521 return;
522 frontend_e_commerce->ShowAddOnContentList(callback, *title_id, user_id, shop_full_display);
523 break;
524 case ShopWebTarget::ConsumableItemList:
525 if (!check_optional_parameter(title_id))
526 return;
527 frontend_e_commerce->ShowConsumableItemList(callback, *title_id, user_id);
528 break;
529 case ShopWebTarget::Home:
530 if (!check_optional_parameter(user_id))
531 return;
532 if (!check_optional_parameter(shop_full_display))
533 return;
534 frontend_e_commerce->ShowShopHome(callback, *user_id, *shop_full_display);
535 break;
536 case ShopWebTarget::Settings:
537 if (!check_optional_parameter(user_id))
538 return;
539 if (!check_optional_parameter(shop_full_display))
540 return;
541 frontend_e_commerce->ShowSettings(callback, *user_id, *shop_full_display);
542 break;
543 case ShopWebTarget::SubscriptionList:
544 if (!check_optional_parameter(title_id))
545 return; 442 return;
546 frontend_e_commerce->ShowSubscriptionList(callback, *title_id, user_id); 443 }
547 break;
548 default:
549 UNREACHABLE();
550 } 444 }
445
446 LOG_INFO(Service_AM, "Opening offline document at {}", offline_document);
447
448 frontend.OpenLocalWebPage(
449 offline_document, [this] { ExtractOfflineRomFS(); },
450 [this](WebExitReason exit_reason, std::string last_url) {
451 WebBrowserExit(exit_reason, last_url);
452 });
551} 453}
552 454
553void WebBrowser::ExecuteOffline() { 455void WebBrowser::ExecuteShare() {
554 frontend.OpenPageLocal( 456 LOG_WARNING(Service_AM, "(STUBBED) called, Share Applet is not implemented");
555 filename, [this] { UnpackRomFS(); }, [this] { Finalize(); }); 457 WebBrowserExit(WebExitReason::EndButtonPressed);
458}
459
460void WebBrowser::ExecuteWeb() {
461 LOG_INFO(Service_AM, "Opening external URL at {}", external_url);
462
463 frontend.OpenExternalWebPage(external_url,
464 [this](WebExitReason exit_reason, std::string last_url) {
465 WebBrowserExit(exit_reason, last_url);
466 });
556} 467}
557 468
469void WebBrowser::ExecuteWifi() {
470 LOG_WARNING(Service_AM, "(STUBBED) called, Wifi Applet is not implemented");
471 WebBrowserExit(WebExitReason::EndButtonPressed);
472}
473
474void WebBrowser::ExecuteLobby() {
475 LOG_WARNING(Service_AM, "(STUBBED) called, Lobby Applet is not implemented");
476 WebBrowserExit(WebExitReason::EndButtonPressed);
477}
558} // namespace Service::AM::Applets 478} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h
index 8d4027411..04c274754 100644
--- a/src/core/hle/service/am/applets/web_browser.h
+++ b/src/core/hle/service/am/applets/web_browser.h
@@ -1,28 +1,31 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <map> 7#include <optional>
8
9#include "common/common_funcs.h"
10#include "common/common_types.h"
8#include "core/file_sys/vfs_types.h" 11#include "core/file_sys/vfs_types.h"
9#include "core/hle/service/am/am.h" 12#include "core/hle/result.h"
10#include "core/hle/service/am/applets/applets.h" 13#include "core/hle/service/am/applets/applets.h"
14#include "core/hle/service/am/applets/web_types.h"
11 15
12namespace Core { 16namespace Core {
13class System; 17class System;
14} 18}
15 19
16namespace Service::AM::Applets { 20namespace FileSys {
21enum class ContentRecordType : u8;
22}
17 23
18enum class ShimKind : u32; 24namespace Service::AM::Applets {
19enum class ShopWebTarget;
20enum class WebArgTLVType : u16;
21 25
22class WebBrowser final : public Applet { 26class WebBrowser final : public Applet {
23public: 27public:
24 WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, 28 WebBrowser(Core::System& system_, const Core::Frontend::WebBrowserApplet& frontend_);
25 Core::Frontend::ECommerceApplet* frontend_e_commerce_ = nullptr);
26 29
27 ~WebBrowser() override; 30 ~WebBrowser() override;
28 31
@@ -33,49 +36,50 @@ public:
33 void ExecuteInteractive() override; 36 void ExecuteInteractive() override;
34 void Execute() override; 37 void Execute() override;
35 38
36 // Callback to be fired when the frontend needs the manual RomFS unpacked to temporary 39 void ExtractOfflineRomFS();
37 // directory. This is a blocking call and may take a while as some manuals can be up to 100MB in
38 // size. Attempting to access files at filename before invocation is likely to not work.
39 void UnpackRomFS();
40 40
41 // Callback to be fired when the frontend is finished browsing. This will delete the temporary 41 void WebBrowserExit(WebExitReason exit_reason, std::string last_url = "");
42 // manual RomFS extracted files, so ensure this is only called at actual finalization.
43 void Finalize();
44 42
45private: 43private:
46 void InitializeInternal(); 44 bool InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const;
47 void ExecuteInternal();
48 45
49 // Specific initializers for the types of web applets 46 std::optional<std::vector<u8>> GetInputTLVData(WebArgInputTLVType input_tlv_type);
47
48 // Initializers for the various types of browser applets
50 void InitializeShop(); 49 void InitializeShop();
50 void InitializeLogin();
51 void InitializeOffline(); 51 void InitializeOffline();
52 void InitializeShare();
53 void InitializeWeb();
54 void InitializeWifi();
55 void InitializeLobby();
52 56
53 // Specific executors for the types of web applets 57 // Executors for the various types of browser applets
54 void ExecuteShop(); 58 void ExecuteShop();
59 void ExecuteLogin();
55 void ExecuteOffline(); 60 void ExecuteOffline();
61 void ExecuteShare();
62 void ExecuteWeb();
63 void ExecuteWifi();
64 void ExecuteLobby();
56 65
57 Core::Frontend::WebBrowserApplet& frontend; 66 const Core::Frontend::WebBrowserApplet& frontend;
58
59 // Extra frontends for specialized functions
60 Core::Frontend::ECommerceApplet* frontend_e_commerce;
61 67
62 bool complete = false; 68 bool complete{false};
63 bool unpacked = false; 69 ResultCode status{RESULT_SUCCESS};
64 ResultCode status = RESULT_SUCCESS;
65 70
66 ShimKind kind; 71 WebAppletVersion web_applet_version;
67 std::map<WebArgTLVType, std::vector<u8>> args; 72 WebExitReason web_exit_reason;
73 WebArgHeader web_arg_header;
74 WebArgInputTLVMap web_arg_input_tlv_map;
68 75
76 u64 title_id;
77 FileSys::ContentRecordType nca_type;
78 std::string offline_cache_dir;
79 std::string offline_document;
69 FileSys::VirtualFile offline_romfs; 80 FileSys::VirtualFile offline_romfs;
70 std::string temporary_dir; 81
71 std::string filename; 82 std::string external_url;
72
73 ShopWebTarget shop_web_target;
74 std::map<std::string, std::string, std::less<>> shop_query;
75 std::optional<u64> title_id = 0;
76 std::optional<u128> user_id;
77 std::optional<bool> shop_full_display;
78 std::string shop_extra_parameter;
79 83
80 Core::System& system; 84 Core::System& system;
81}; 85};
diff --git a/src/core/hle/service/am/applets/web_types.h b/src/core/hle/service/am/applets/web_types.h
new file mode 100644
index 000000000..419c2bf79
--- /dev/null
+++ b/src/core/hle/service/am/applets/web_types.h
@@ -0,0 +1,178 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <unordered_map>
9#include <vector>
10
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "common/swap.h"
14
15namespace Service::AM::Applets {
16
17enum class WebAppletVersion : u32_le {
18 Version0 = 0x0, // Only used by WifiWebAuthApplet
19 Version131072 = 0x20000, // 1.0.0 - 2.3.0
20 Version196608 = 0x30000, // 3.0.0 - 4.1.0
21 Version327680 = 0x50000, // 5.0.0 - 5.1.0
22 Version393216 = 0x60000, // 6.0.0 - 7.0.1
23 Version524288 = 0x80000, // 8.0.0+
24};
25
26enum class ShimKind : u32 {
27 Shop = 1,
28 Login = 2,
29 Offline = 3,
30 Share = 4,
31 Web = 5,
32 Wifi = 6,
33 Lobby = 7,
34};
35
36enum class WebExitReason : u32 {
37 EndButtonPressed = 0,
38 BackButtonPressed = 1,
39 ExitRequested = 2,
40 CallbackURL = 3,
41 WindowClosed = 4,
42 ErrorDialog = 7,
43};
44
45enum class WebArgInputTLVType : u16 {
46 InitialURL = 0x1,
47 CallbackURL = 0x3,
48 CallbackableURL = 0x4,
49 ApplicationID = 0x5,
50 DocumentPath = 0x6,
51 DocumentKind = 0x7,
52 SystemDataID = 0x8,
53 ShareStartPage = 0x9,
54 Whitelist = 0xA,
55 News = 0xB,
56 UserID = 0xE,
57 AlbumEntry0 = 0xF,
58 ScreenShotEnabled = 0x10,
59 EcClientCertEnabled = 0x11,
60 PlayReportEnabled = 0x13,
61 BootDisplayKind = 0x17,
62 BackgroundKind = 0x18,
63 FooterEnabled = 0x19,
64 PointerEnabled = 0x1A,
65 LeftStickMode = 0x1B,
66 KeyRepeatFrame1 = 0x1C,
67 KeyRepeatFrame2 = 0x1D,
68 BootAsMediaPlayerInverted = 0x1E,
69 DisplayURLKind = 0x1F,
70 BootAsMediaPlayer = 0x21,
71 ShopJumpEnabled = 0x22,
72 MediaAutoPlayEnabled = 0x23,
73 LobbyParameter = 0x24,
74 ApplicationAlbumEntry = 0x26,
75 JsExtensionEnabled = 0x27,
76 AdditionalCommentText = 0x28,
77 TouchEnabledOnContents = 0x29,
78 UserAgentAdditionalString = 0x2A,
79 AdditionalMediaData0 = 0x2B,
80 MediaPlayerAutoCloseEnabled = 0x2C,
81 PageCacheEnabled = 0x2D,
82 WebAudioEnabled = 0x2E,
83 YouTubeVideoWhitelist = 0x31,
84 FooterFixedKind = 0x32,
85 PageFadeEnabled = 0x33,
86 MediaCreatorApplicationRatingAge = 0x34,
87 BootLoadingIconEnabled = 0x35,
88 PageScrollIndicatorEnabled = 0x36,
89 MediaPlayerSpeedControlEnabled = 0x37,
90 AlbumEntry1 = 0x38,
91 AlbumEntry2 = 0x39,
92 AlbumEntry3 = 0x3A,
93 AdditionalMediaData1 = 0x3B,
94 AdditionalMediaData2 = 0x3C,
95 AdditionalMediaData3 = 0x3D,
96 BootFooterButton = 0x3E,
97 OverrideWebAudioVolume = 0x3F,
98 OverrideMediaAudioVolume = 0x40,
99 BootMode = 0x41,
100 WebSessionEnabled = 0x42,
101 MediaPlayerOfflineEnabled = 0x43,
102};
103
104enum class WebArgOutputTLVType : u16 {
105 ShareExitReason = 0x1,
106 LastURL = 0x2,
107 LastURLSize = 0x3,
108 SharePostResult = 0x4,
109 PostServiceName = 0x5,
110 PostServiceNameSize = 0x6,
111 PostID = 0x7,
112 PostIDSize = 0x8,
113 MediaPlayerAutoClosedByCompletion = 0x9,
114};
115
116enum class DocumentKind : u32 {
117 OfflineHtmlPage = 1,
118 ApplicationLegalInformation = 2,
119 SystemDataPage = 3,
120};
121
122enum class ShareStartPage : u32 {
123 Default,
124 Settings,
125};
126
127enum class BootDisplayKind : u32 {
128 Default,
129 White,
130 Black,
131};
132
133enum class BackgroundKind : u32 {
134 Default,
135};
136
137enum class LeftStickMode : u32 {
138 Pointer,
139 Cursor,
140};
141
142enum class WebSessionBootMode : u32 {
143 AllForeground,
144 AllForegroundInitiallyHidden,
145};
146
147struct WebArgHeader {
148 u16 total_tlv_entries{};
149 INSERT_PADDING_BYTES(2);
150 ShimKind shim_kind{};
151};
152static_assert(sizeof(WebArgHeader) == 0x8, "WebArgHeader has incorrect size.");
153
154struct WebArgInputTLV {
155 WebArgInputTLVType input_tlv_type{};
156 u16 arg_data_size{};
157 INSERT_PADDING_WORDS(1);
158};
159static_assert(sizeof(WebArgInputTLV) == 0x8, "WebArgInputTLV has incorrect size.");
160
161struct WebArgOutputTLV {
162 WebArgOutputTLVType output_tlv_type{};
163 u16 arg_data_size{};
164 INSERT_PADDING_WORDS(1);
165};
166static_assert(sizeof(WebArgOutputTLV) == 0x8, "WebArgOutputTLV has incorrect size.");
167
168struct WebCommonReturnValue {
169 WebExitReason exit_reason{};
170 INSERT_PADDING_WORDS(1);
171 std::array<char, 0x1000> last_url{};
172 u64 last_url_size{};
173};
174static_assert(sizeof(WebCommonReturnValue) == 0x1010, "WebCommonReturnValue has incorrect size.");
175
176using WebArgInputTLVMap = std::unordered_map<WebArgInputTLVType, std::vector<u8>>;
177
178} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index 6abac3f78..23e28565b 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -7,6 +7,7 @@
7#include <vector> 7#include <vector>
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/file_sys/common_funcs.h"
10#include "core/file_sys/content_archive.h" 11#include "core/file_sys/content_archive.h"
11#include "core/file_sys/control_metadata.h" 12#include "core/file_sys/control_metadata.h"
12#include "core/file_sys/nca_metadata.h" 13#include "core/file_sys/nca_metadata.h"
@@ -23,11 +24,8 @@
23 24
24namespace Service::AOC { 25namespace Service::AOC {
25 26
26constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000;
27constexpr u64 DLC_BASE_TO_AOC_ID = 0x1000;
28
29static bool CheckAOCTitleIDMatchesBase(u64 title_id, u64 base) { 27static bool CheckAOCTitleIDMatchesBase(u64 title_id, u64 base) {
30 return (title_id & DLC_BASE_TITLE_ID_MASK) == base; 28 return FileSys::GetBaseTitleID(title_id) == base;
31} 29}
32 30
33static std::vector<u64> AccumulateAOCTitleIDs(Core::System& system) { 31static std::vector<u64> AccumulateAOCTitleIDs(Core::System& system) {
@@ -48,6 +46,62 @@ static std::vector<u64> AccumulateAOCTitleIDs(Core::System& system) {
48 return add_on_content; 46 return add_on_content;
49} 47}
50 48
49class IPurchaseEventManager final : public ServiceFramework<IPurchaseEventManager> {
50public:
51 explicit IPurchaseEventManager(Core::System& system_)
52 : ServiceFramework{system_, "IPurchaseEventManager"} {
53 // clang-format off
54 static const FunctionInfo functions[] = {
55 {0, &IPurchaseEventManager::SetDefaultDeliveryTarget, "SetDefaultDeliveryTarget"},
56 {1, &IPurchaseEventManager::SetDeliveryTarget, "SetDeliveryTarget"},
57 {2, &IPurchaseEventManager::GetPurchasedEventReadableHandle, "GetPurchasedEventReadableHandle"},
58 {3, nullptr, "PopPurchasedProductInfo"},
59 {4, nullptr, "PopPurchasedProductInfoWithUid"},
60 };
61 // clang-format on
62
63 RegisterHandlers(functions);
64
65 purchased_event = Kernel::WritableEvent::CreateEventPair(
66 system.Kernel(), "IPurchaseEventManager:PurchasedEvent");
67 }
68
69private:
70 void SetDefaultDeliveryTarget(Kernel::HLERequestContext& ctx) {
71 IPC::RequestParser rp{ctx};
72
73 const auto unknown_1 = rp.Pop<u64>();
74 [[maybe_unused]] const auto unknown_2 = ctx.ReadBuffer();
75
76 LOG_WARNING(Service_AOC, "(STUBBED) called, unknown_1={}", unknown_1);
77
78 IPC::ResponseBuilder rb{ctx, 2};
79 rb.Push(RESULT_SUCCESS);
80 }
81
82 void SetDeliveryTarget(Kernel::HLERequestContext& ctx) {
83 IPC::RequestParser rp{ctx};
84
85 const auto unknown_1 = rp.Pop<u64>();
86 [[maybe_unused]] const auto unknown_2 = ctx.ReadBuffer();
87
88 LOG_WARNING(Service_AOC, "(STUBBED) called, unknown_1={}", unknown_1);
89
90 IPC::ResponseBuilder rb{ctx, 2};
91 rb.Push(RESULT_SUCCESS);
92 }
93
94 void GetPurchasedEventReadableHandle(Kernel::HLERequestContext& ctx) {
95 LOG_WARNING(Service_AOC, "called");
96
97 IPC::ResponseBuilder rb{ctx, 2, 1};
98 rb.Push(RESULT_SUCCESS);
99 rb.PushCopyObjects(purchased_event.readable);
100 }
101
102 Kernel::EventPair purchased_event;
103};
104
51AOC_U::AOC_U(Core::System& system_) 105AOC_U::AOC_U(Core::System& system_)
52 : ServiceFramework{system_, "aoc:u"}, add_on_content{AccumulateAOCTitleIDs(system)} { 106 : ServiceFramework{system_, "aoc:u"}, add_on_content{AccumulateAOCTitleIDs(system)} {
53 // clang-format off 107 // clang-format off
@@ -62,8 +116,8 @@ AOC_U::AOC_U(Core::System& system_)
62 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, 116 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
63 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, 117 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
64 {9, nullptr, "GetAddOnContentLostErrorCode"}, 118 {9, nullptr, "GetAddOnContentLostErrorCode"},
65 {100, nullptr, "CreateEcPurchasedEventManager"}, 119 {100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"},
66 {101, nullptr, "CreatePermanentEcPurchasedEventManager"}, 120 {101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"},
67 }; 121 };
68 // clang-format on 122 // clang-format on
69 123
@@ -123,11 +177,11 @@ void AOC_U::ListAddOnContent(Kernel::HLERequestContext& ctx) {
123 const auto& disabled = Settings::values.disabled_addons[current]; 177 const auto& disabled = Settings::values.disabled_addons[current];
124 if (std::find(disabled.begin(), disabled.end(), "DLC") == disabled.end()) { 178 if (std::find(disabled.begin(), disabled.end(), "DLC") == disabled.end()) {
125 for (u64 content_id : add_on_content) { 179 for (u64 content_id : add_on_content) {
126 if ((content_id & DLC_BASE_TITLE_ID_MASK) != current) { 180 if (FileSys::GetBaseTitleID(content_id) != current) {
127 continue; 181 continue;
128 } 182 }
129 183
130 out.push_back(static_cast<u32>(content_id & 0x7FF)); 184 out.push_back(static_cast<u32>(FileSys::GetAOCID(content_id)));
131 } 185 }
132 } 186 }
133 187
@@ -169,7 +223,7 @@ void AOC_U::GetAddOnContentBaseId(Kernel::HLERequestContext& ctx) {
169 223
170 const auto res = pm.GetControlMetadata(); 224 const auto res = pm.GetControlMetadata();
171 if (res.first == nullptr) { 225 if (res.first == nullptr) {
172 rb.Push(title_id + DLC_BASE_TO_AOC_ID); 226 rb.Push(FileSys::GetAOCBaseTitleID(title_id));
173 return; 227 return;
174 } 228 }
175 229
@@ -201,6 +255,22 @@ void AOC_U::GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx) {
201 rb.PushCopyObjects(aoc_change_event.readable); 255 rb.PushCopyObjects(aoc_change_event.readable);
202} 256}
203 257
258void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) {
259 LOG_WARNING(Service_AOC, "(STUBBED) called");
260
261 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
262 rb.Push(RESULT_SUCCESS);
263 rb.PushIpcInterface<IPurchaseEventManager>(system);
264}
265
266void AOC_U::CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx) {
267 LOG_WARNING(Service_AOC, "(STUBBED) called");
268
269 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
270 rb.Push(RESULT_SUCCESS);
271 rb.PushIpcInterface<IPurchaseEventManager>(system);
272}
273
204void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { 274void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
205 std::make_shared<AOC_U>(system)->InstallAsService(service_manager); 275 std::make_shared<AOC_U>(system)->InstallAsService(service_manager);
206} 276}
diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h
index 7628f4568..26ee51be0 100644
--- a/src/core/hle/service/aoc/aoc_u.h
+++ b/src/core/hle/service/aoc/aoc_u.h
@@ -27,6 +27,8 @@ private:
27 void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx); 27 void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx);
28 void PrepareAddOnContent(Kernel::HLERequestContext& ctx); 28 void PrepareAddOnContent(Kernel::HLERequestContext& ctx);
29 void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx); 29 void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx);
30 void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
31 void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
30 32
31 std::vector<u64> add_on_content; 33 std::vector<u64> add_on_content;
32 Kernel::EventPair aoc_change_event; 34 Kernel::EventPair aoc_change_event;
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp
index ce993bad3..03636642b 100644
--- a/src/core/hle/service/apm/controller.cpp
+++ b/src/core/hle/service/apm/controller.cpp
@@ -48,8 +48,7 @@ void Controller::SetPerformanceConfiguration(PerformanceMode mode,
48 [config](const auto& entry) { return entry.first == config; }); 48 [config](const auto& entry) { return entry.first == config; });
49 49
50 if (iter == config_to_speed.cend()) { 50 if (iter == config_to_speed.cend()) {
51 LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", 51 LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", config);
52 static_cast<u32>(config));
53 return; 52 return;
54 } 53 }
55 54
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp
index 89442e21e..298f6d520 100644
--- a/src/core/hle/service/apm/interface.cpp
+++ b/src/core/hle/service/apm/interface.cpp
@@ -28,8 +28,7 @@ private:
28 28
29 const auto mode = rp.PopEnum<PerformanceMode>(); 29 const auto mode = rp.PopEnum<PerformanceMode>();
30 const auto config = rp.PopEnum<PerformanceConfiguration>(); 30 const auto config = rp.PopEnum<PerformanceConfiguration>();
31 LOG_DEBUG(Service_APM, "called mode={} config={}", static_cast<u32>(mode), 31 LOG_DEBUG(Service_APM, "called mode={} config={}", mode, config);
32 static_cast<u32>(config));
33 32
34 controller.SetPerformanceConfiguration(mode, config); 33 controller.SetPerformanceConfiguration(mode, config);
35 34
@@ -41,7 +40,7 @@ private:
41 IPC::RequestParser rp{ctx}; 40 IPC::RequestParser rp{ctx};
42 41
43 const auto mode = rp.PopEnum<PerformanceMode>(); 42 const auto mode = rp.PopEnum<PerformanceMode>();
44 LOG_DEBUG(Service_APM, "called mode={}", static_cast<u32>(mode)); 43 LOG_DEBUG(Service_APM, "called mode={}", mode);
45 44
46 IPC::ResponseBuilder rb{ctx, 3}; 45 IPC::ResponseBuilder rb{ctx, 3};
47 rb.Push(RESULT_SUCCESS); 46 rb.Push(RESULT_SUCCESS);
@@ -111,7 +110,7 @@ void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
111 IPC::RequestParser rp{ctx}; 110 IPC::RequestParser rp{ctx};
112 const auto mode = rp.PopEnum<CpuBoostMode>(); 111 const auto mode = rp.PopEnum<CpuBoostMode>();
113 112
114 LOG_DEBUG(Service_APM, "called, mode={:08X}", static_cast<u32>(mode)); 113 LOG_DEBUG(Service_APM, "called, mode={:08X}", mode);
115 114
116 controller.SetFromCpuBoostMode(mode); 115 controller.SetFromCpuBoostMode(mode);
117 116
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 145f47ee2..0cd797109 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -70,8 +70,10 @@ public:
70 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased"); 70 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased");
71 71
72 stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate, 72 stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
73 audio_params.channel_count, std::move(unique_name), 73 audio_params.channel_count, std::move(unique_name), [this] {
74 [this] { buffer_event.writable->Signal(); }); 74 const auto guard = LockService();
75 buffer_event.writable->Signal();
76 });
75 } 77 }
76 78
77private: 79private:
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 6e7b7316c..c5c22d053 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -49,16 +49,16 @@ public:
49 49
50 system_event = 50 system_event =
51 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent"); 51 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent");
52 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), system.Memory(), 52 renderer = std::make_unique<AudioCore::AudioRenderer>(
53 audren_params, system_event.writable, 53 system.CoreTiming(), system.Memory(), audren_params,
54 instance_number); 54 [this]() {
55 const auto guard = LockService();
56 system_event.writable->Signal();
57 },
58 instance_number);
55 } 59 }
56 60
57private: 61private:
58 void UpdateAudioCallback() {
59 system_event.writable->Signal();
60 }
61
62 void GetSampleRate(Kernel::HLERequestContext& ctx) { 62 void GetSampleRate(Kernel::HLERequestContext& ctx) {
63 LOG_DEBUG(Service_Audio, "called"); 63 LOG_DEBUG(Service_Audio, "called");
64 64
diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp
index 3b6f7498e..e43f3f47f 100644
--- a/src/core/hle/service/bcat/backend/boxcat.cpp
+++ b/src/core/hle/service/bcat/backend/boxcat.cpp
@@ -483,7 +483,7 @@ Boxcat::StatusResult Boxcat::GetStatus(std::optional<std::string>& global,
483 global = json["global"].get<std::string>(); 483 global = json["global"].get<std::string>();
484 484
485 if (json["games"].is_array()) { 485 if (json["games"].is_array()) {
486 for (const auto object : json["games"]) { 486 for (const auto& object : json["games"]) {
487 if (object.is_object() && object.find("name") != object.end()) { 487 if (object.is_object() && object.find("name") != object.end()) {
488 EventStatus detail{}; 488 EventStatus detail{};
489 if (object["header"].is_string()) { 489 if (object["header"].is_string()) {
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 9b7672a91..13147472e 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -111,8 +111,9 @@ static void GenerateErrorReport(Core::System& system, ResultCode error_code,
111 111
112static void ThrowFatalError(Core::System& system, ResultCode error_code, FatalType fatal_type, 112static void ThrowFatalError(Core::System& system, ResultCode error_code, FatalType fatal_type,
113 const FatalInfo& info) { 113 const FatalInfo& info) {
114 LOG_ERROR(Service_Fatal, "Threw fatal error type {} with error code 0x{:X}", 114 LOG_ERROR(Service_Fatal, "Threw fatal error type {} with error code 0x{:X}", fatal_type,
115 static_cast<u32>(fatal_type), error_code.raw); 115 error_code.raw);
116
116 switch (fatal_type) { 117 switch (fatal_type) {
117 case FatalType::ErrorReportAndScreen: 118 case FatalType::ErrorReportAndScreen:
118 GenerateErrorReport(system, error_code, info); 119 GenerateErrorReport(system, error_code, info);
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index ca93062cf..b15c737e1 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -298,10 +298,35 @@ ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFSCurrentProcess()
298 return romfs_factory->OpenCurrentProcess(system.CurrentProcess()->GetTitleID()); 298 return romfs_factory->OpenCurrentProcess(system.CurrentProcess()->GetTitleID());
299} 299}
300 300
301ResultVal<FileSys::VirtualFile> FileSystemController::OpenPatchedRomFS(
302 u64 title_id, FileSys::ContentRecordType type) const {
303 LOG_TRACE(Service_FS, "Opening patched RomFS for title_id={:016X}", title_id);
304
305 if (romfs_factory == nullptr) {
306 // TODO: Find a better error code for this
307 return RESULT_UNKNOWN;
308 }
309
310 return romfs_factory->OpenPatchedRomFS(title_id, type);
311}
312
313ResultVal<FileSys::VirtualFile> FileSystemController::OpenPatchedRomFSWithProgramIndex(
314 u64 title_id, u8 program_index, FileSys::ContentRecordType type) const {
315 LOG_TRACE(Service_FS, "Opening patched RomFS for title_id={:016X}, program_index={}", title_id,
316 program_index);
317
318 if (romfs_factory == nullptr) {
319 // TODO: Find a better error code for this
320 return RESULT_UNKNOWN;
321 }
322
323 return romfs_factory->OpenPatchedRomFSWithProgramIndex(title_id, program_index, type);
324}
325
301ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFS( 326ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFS(
302 u64 title_id, FileSys::StorageId storage_id, FileSys::ContentRecordType type) const { 327 u64 title_id, FileSys::StorageId storage_id, FileSys::ContentRecordType type) const {
303 LOG_TRACE(Service_FS, "Opening RomFS for title_id={:016X}, storage_id={:02X}, type={:02X}", 328 LOG_TRACE(Service_FS, "Opening RomFS for title_id={:016X}, storage_id={:02X}, type={:02X}",
304 title_id, static_cast<u8>(storage_id), static_cast<u8>(type)); 329 title_id, storage_id, type);
305 330
306 if (romfs_factory == nullptr) { 331 if (romfs_factory == nullptr) {
307 // TODO(bunnei): Find a better error code for this 332 // TODO(bunnei): Find a better error code for this
@@ -313,8 +338,8 @@ ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFS(
313 338
314ResultVal<FileSys::VirtualDir> FileSystemController::CreateSaveData( 339ResultVal<FileSys::VirtualDir> FileSystemController::CreateSaveData(
315 FileSys::SaveDataSpaceId space, const FileSys::SaveDataAttribute& save_struct) const { 340 FileSys::SaveDataSpaceId space, const FileSys::SaveDataAttribute& save_struct) const {
316 LOG_TRACE(Service_FS, "Creating Save Data for space_id={:01X}, save_struct={}", 341 LOG_TRACE(Service_FS, "Creating Save Data for space_id={:01X}, save_struct={}", space,
317 static_cast<u8>(space), save_struct.DebugInfo()); 342 save_struct.DebugInfo());
318 343
319 if (save_data_factory == nullptr) { 344 if (save_data_factory == nullptr) {
320 return FileSys::ERROR_ENTITY_NOT_FOUND; 345 return FileSys::ERROR_ENTITY_NOT_FOUND;
@@ -325,8 +350,8 @@ ResultVal<FileSys::VirtualDir> FileSystemController::CreateSaveData(
325 350
326ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveData( 351ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveData(
327 FileSys::SaveDataSpaceId space, const FileSys::SaveDataAttribute& attribute) const { 352 FileSys::SaveDataSpaceId space, const FileSys::SaveDataAttribute& attribute) const {
328 LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}", 353 LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}", space,
329 static_cast<u8>(space), attribute.DebugInfo()); 354 attribute.DebugInfo());
330 355
331 if (save_data_factory == nullptr) { 356 if (save_data_factory == nullptr) {
332 return FileSys::ERROR_ENTITY_NOT_FOUND; 357 return FileSys::ERROR_ENTITY_NOT_FOUND;
@@ -337,7 +362,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveData(
337 362
338ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveDataSpace( 363ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveDataSpace(
339 FileSys::SaveDataSpaceId space) const { 364 FileSys::SaveDataSpaceId space) const {
340 LOG_TRACE(Service_FS, "Opening Save Data Space for space_id={:01X}", static_cast<u8>(space)); 365 LOG_TRACE(Service_FS, "Opening Save Data Space for space_id={:01X}", space);
341 366
342 if (save_data_factory == nullptr) { 367 if (save_data_factory == nullptr) {
343 return FileSys::ERROR_ENTITY_NOT_FOUND; 368 return FileSys::ERROR_ENTITY_NOT_FOUND;
@@ -358,7 +383,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenSDMC() const {
358 383
359ResultVal<FileSys::VirtualDir> FileSystemController::OpenBISPartition( 384ResultVal<FileSys::VirtualDir> FileSystemController::OpenBISPartition(
360 FileSys::BisPartitionId id) const { 385 FileSys::BisPartitionId id) const {
361 LOG_TRACE(Service_FS, "Opening BIS Partition with id={:08X}", static_cast<u32>(id)); 386 LOG_TRACE(Service_FS, "Opening BIS Partition with id={:08X}", id);
362 387
363 if (bis_factory == nullptr) { 388 if (bis_factory == nullptr) {
364 return FileSys::ERROR_ENTITY_NOT_FOUND; 389 return FileSys::ERROR_ENTITY_NOT_FOUND;
@@ -374,7 +399,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenBISPartition(
374 399
375ResultVal<FileSys::VirtualFile> FileSystemController::OpenBISPartitionStorage( 400ResultVal<FileSys::VirtualFile> FileSystemController::OpenBISPartitionStorage(
376 FileSys::BisPartitionId id) const { 401 FileSys::BisPartitionId id) const {
377 LOG_TRACE(Service_FS, "Opening BIS Partition Storage with id={:08X}", static_cast<u32>(id)); 402 LOG_TRACE(Service_FS, "Opening BIS Partition Storage with id={:08X}", id);
378 403
379 if (bis_factory == nullptr) { 404 if (bis_factory == nullptr) {
380 return FileSys::ERROR_ENTITY_NOT_FOUND; 405 return FileSys::ERROR_ENTITY_NOT_FOUND;
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index 6dbbf0b2b..7102d3f9a 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -66,6 +66,10 @@ public:
66 66
67 void SetPackedUpdate(FileSys::VirtualFile update_raw); 67 void SetPackedUpdate(FileSys::VirtualFile update_raw);
68 ResultVal<FileSys::VirtualFile> OpenRomFSCurrentProcess() const; 68 ResultVal<FileSys::VirtualFile> OpenRomFSCurrentProcess() const;
69 ResultVal<FileSys::VirtualFile> OpenPatchedRomFS(u64 title_id,
70 FileSys::ContentRecordType type) const;
71 ResultVal<FileSys::VirtualFile> OpenPatchedRomFSWithProgramIndex(
72 u64 title_id, u8 program_index, FileSys::ContentRecordType type) const;
69 ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id, 73 ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id,
70 FileSys::ContentRecordType type) const; 74 FileSys::ContentRecordType type) const;
71 ResultVal<FileSys::VirtualDir> CreateSaveData( 75 ResultVal<FileSys::VirtualDir> CreateSaveData(
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index b3480494c..9cc260515 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -413,7 +413,7 @@ public:
413 413
414 const auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>()); 414 const auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
415 415
416 LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, static_cast<u32>(mode)); 416 LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, mode);
417 417
418 auto result = backend.OpenFile(name, mode); 418 auto result = backend.OpenFile(name, mode);
419 if (result.Failed()) { 419 if (result.Failed()) {
@@ -553,8 +553,7 @@ private:
553 const auto save_root = fsc.OpenSaveDataSpace(space); 553 const auto save_root = fsc.OpenSaveDataSpace(space);
554 554
555 if (save_root.Failed() || *save_root == nullptr) { 555 if (save_root.Failed() || *save_root == nullptr) {
556 LOG_ERROR(Service_FS, "The save root for the space_id={:02X} was invalid!", 556 LOG_ERROR(Service_FS, "The save root for the space_id={:02X} was invalid!", space);
557 static_cast<u8>(space));
558 return; 557 return;
559 } 558 }
560 559
@@ -718,7 +717,7 @@ FSP_SRV::FSP_SRV(Core::System& system_)
718 {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"}, 717 {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"},
719 {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"}, 718 {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"},
720 {204, nullptr, "OpenDataFileSystemByProgramIndex"}, 719 {204, nullptr, "OpenDataFileSystemByProgramIndex"},
721 {205, nullptr, "OpenDataStorageByProgramIndex"}, 720 {205, &FSP_SRV::OpenDataStorageWithProgramIndex, "OpenDataStorageWithProgramIndex"},
722 {400, nullptr, "OpenDeviceOperator"}, 721 {400, nullptr, "OpenDeviceOperator"},
723 {500, nullptr, "OpenSdCardDetectionEventNotifier"}, 722 {500, nullptr, "OpenSdCardDetectionEventNotifier"},
724 {501, nullptr, "OpenGameCardDetectionEventNotifier"}, 723 {501, nullptr, "OpenGameCardDetectionEventNotifier"},
@@ -795,8 +794,7 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
795 794
796 const auto type = rp.PopRaw<FileSystemType>(); 795 const auto type = rp.PopRaw<FileSystemType>();
797 const auto title_id = rp.PopRaw<u64>(); 796 const auto title_id = rp.PopRaw<u64>();
798 LOG_WARNING(Service_FS, "(STUBBED) called with type={}, title_id={:016X}", 797 LOG_WARNING(Service_FS, "(STUBBED) called with type={}, title_id={:016X}", type, title_id);
799 static_cast<u8>(type), title_id);
800 798
801 IPC::ResponseBuilder rb{ctx, 2, 0, 0}; 799 IPC::ResponseBuilder rb{ctx, 2, 0, 0};
802 rb.Push(RESULT_UNKNOWN); 800 rb.Push(RESULT_UNKNOWN);
@@ -883,7 +881,7 @@ void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) {
883void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) { 881void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) {
884 IPC::RequestParser rp{ctx}; 882 IPC::RequestParser rp{ctx};
885 const auto space = rp.PopRaw<FileSys::SaveDataSpaceId>(); 883 const auto space = rp.PopRaw<FileSys::SaveDataSpaceId>();
886 LOG_INFO(Service_FS, "called, space={}", static_cast<u8>(space)); 884 LOG_INFO(Service_FS, "called, space={}", space);
887 885
888 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 886 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
889 rb.Push(RESULT_SUCCESS); 887 rb.Push(RESULT_SUCCESS);
@@ -915,10 +913,10 @@ void FSP_SRV::ReadSaveDataFileSystemExtraDataWithMaskBySaveDataAttribute(
915 "(STUBBED) called, flags={}, space_id={}, attribute.title_id={:016X}\n" 913 "(STUBBED) called, flags={}, space_id={}, attribute.title_id={:016X}\n"
916 "attribute.user_id={:016X}{:016X}, attribute.save_id={:016X}\n" 914 "attribute.user_id={:016X}{:016X}, attribute.save_id={:016X}\n"
917 "attribute.type={}, attribute.rank={}, attribute.index={}", 915 "attribute.type={}, attribute.rank={}, attribute.index={}",
918 flags, static_cast<u32>(parameters.space_id), parameters.attribute.title_id, 916 flags, parameters.space_id, parameters.attribute.title_id,
919 parameters.attribute.user_id[1], parameters.attribute.user_id[0], 917 parameters.attribute.user_id[1], parameters.attribute.user_id[0],
920 parameters.attribute.save_id, static_cast<u32>(parameters.attribute.type), 918 parameters.attribute.save_id, parameters.attribute.type, parameters.attribute.rank,
921 static_cast<u32>(parameters.attribute.rank), parameters.attribute.index); 919 parameters.attribute.index);
922 920
923 IPC::ResponseBuilder rb{ctx, 3}; 921 IPC::ResponseBuilder rb{ctx, 3};
924 rb.Push(RESULT_SUCCESS); 922 rb.Push(RESULT_SUCCESS);
@@ -951,7 +949,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
951 const auto title_id = rp.PopRaw<u64>(); 949 const auto title_id = rp.PopRaw<u64>();
952 950
953 LOG_DEBUG(Service_FS, "called with storage_id={:02X}, unknown={:08X}, title_id={:016X}", 951 LOG_DEBUG(Service_FS, "called with storage_id={:02X}, unknown={:08X}, title_id={:016X}",
954 static_cast<u8>(storage_id), unknown, title_id); 952 storage_id, unknown, title_id);
955 953
956 auto data = fsc.OpenRomFS(title_id, storage_id, FileSys::ContentRecordType::Data); 954 auto data = fsc.OpenRomFS(title_id, storage_id, FileSys::ContentRecordType::Data);
957 955
@@ -968,7 +966,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
968 // TODO(DarkLordZach): Find the right error code to use here 966 // TODO(DarkLordZach): Find the right error code to use here
969 LOG_ERROR(Service_FS, 967 LOG_ERROR(Service_FS,
970 "could not open data storage with title_id={:016X}, storage_id={:02X}", title_id, 968 "could not open data storage with title_id={:016X}, storage_id={:02X}", title_id,
971 static_cast<u8>(storage_id)); 969 storage_id);
972 IPC::ResponseBuilder rb{ctx, 2}; 970 IPC::ResponseBuilder rb{ctx, 2};
973 rb.Push(RESULT_UNKNOWN); 971 rb.Push(RESULT_UNKNOWN);
974 return; 972 return;
@@ -987,21 +985,46 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
987void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { 985void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
988 IPC::RequestParser rp{ctx}; 986 IPC::RequestParser rp{ctx};
989 987
990 auto storage_id = rp.PopRaw<FileSys::StorageId>(); 988 const auto storage_id = rp.PopRaw<FileSys::StorageId>();
991 auto title_id = rp.PopRaw<u64>(); 989 const auto title_id = rp.PopRaw<u64>();
992 990
993 LOG_DEBUG(Service_FS, "called with storage_id={:02X}, title_id={:016X}", 991 LOG_DEBUG(Service_FS, "called with storage_id={:02X}, title_id={:016X}", storage_id, title_id);
994 static_cast<u8>(storage_id), title_id);
995 992
996 IPC::ResponseBuilder rb{ctx, 2}; 993 IPC::ResponseBuilder rb{ctx, 2};
997 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); 994 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND);
998} 995}
999 996
997void FSP_SRV::OpenDataStorageWithProgramIndex(Kernel::HLERequestContext& ctx) {
998 IPC::RequestParser rp{ctx};
999
1000 const auto program_index = rp.PopRaw<u8>();
1001
1002 LOG_DEBUG(Service_FS, "called, program_index={}", program_index);
1003
1004 auto romfs = fsc.OpenPatchedRomFSWithProgramIndex(
1005 system.CurrentProcess()->GetTitleID(), program_index, FileSys::ContentRecordType::Program);
1006
1007 if (romfs.Failed()) {
1008 // TODO: Find the right error code to use here
1009 LOG_ERROR(Service_FS, "could not open storage with program_index={}", program_index);
1010
1011 IPC::ResponseBuilder rb{ctx, 2};
1012 rb.Push(RESULT_UNKNOWN);
1013 return;
1014 }
1015
1016 auto storage = std::make_shared<IStorage>(system, std::move(romfs.Unwrap()));
1017
1018 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
1019 rb.Push(RESULT_SUCCESS);
1020 rb.PushIpcInterface<IStorage>(std::move(storage));
1021}
1022
1000void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { 1023void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) {
1001 IPC::RequestParser rp{ctx}; 1024 IPC::RequestParser rp{ctx};
1002 log_mode = rp.PopEnum<LogMode>(); 1025 log_mode = rp.PopEnum<LogMode>();
1003 1026
1004 LOG_DEBUG(Service_FS, "called, log_mode={:08X}", static_cast<u32>(log_mode)); 1027 LOG_DEBUG(Service_FS, "called, log_mode={:08X}", log_mode);
1005 1028
1006 IPC::ResponseBuilder rb{ctx, 2}; 1029 IPC::ResponseBuilder rb{ctx, 2};
1007 rb.Push(RESULT_SUCCESS); 1030 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index 472286d6e..8ed933279 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -49,6 +49,7 @@ private:
49 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 49 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
50 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); 50 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
51 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 51 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
52 void OpenDataStorageWithProgramIndex(Kernel::HLERequestContext& ctx);
52 void SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); 53 void SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
53 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); 54 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
54 void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx); 55 void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index 40a289594..c5b053c31 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -229,8 +229,7 @@ private:
229 break; 229 break;
230 default: 230 default:
231 // HOS seems not have an error case for an unknown notification 231 // HOS seems not have an error case for an unknown notification
232 LOG_WARNING(Service_ACC, "Unknown notification {:08X}", 232 LOG_WARNING(Service_ACC, "Unknown notification {:08X}", notification.notification_type);
233 static_cast<u32>(notification.notification_type));
234 break; 233 break;
235 } 234 }
236 235
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 66c4fe60a..d280e7caf 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -116,6 +116,31 @@ u32 Controller_NPad::IndexToNPad(std::size_t index) {
116 } 116 }
117} 117}
118 118
119bool Controller_NPad::IsNpadIdValid(u32 npad_id) {
120 switch (npad_id) {
121 case 0:
122 case 1:
123 case 2:
124 case 3:
125 case 4:
126 case 5:
127 case 6:
128 case 7:
129 case NPAD_UNKNOWN:
130 case NPAD_HANDHELD:
131 return true;
132 default:
133 LOG_ERROR(Service_HID, "Invalid npad id {}", npad_id);
134 return false;
135 }
136}
137
138bool Controller_NPad::IsDeviceHandleValid(const DeviceHandle& device_handle) {
139 return IsNpadIdValid(device_handle.npad_id) &&
140 device_handle.npad_type < NpadType::MaxNpadType &&
141 device_handle.device_index < DeviceIndex::MaxDeviceIndex;
142}
143
119Controller_NPad::Controller_NPad(Core::System& system) : ControllerBase(system), system(system) {} 144Controller_NPad::Controller_NPad(Core::System& system) : ControllerBase(system), system(system) {}
120 145
121Controller_NPad::~Controller_NPad() { 146Controller_NPad::~Controller_NPad() {
@@ -742,6 +767,10 @@ bool Controller_NPad::VibrateControllerAtIndex(std::size_t npad_index, std::size
742 767
743void Controller_NPad::VibrateController(const DeviceHandle& vibration_device_handle, 768void Controller_NPad::VibrateController(const DeviceHandle& vibration_device_handle,
744 const VibrationValue& vibration_value) { 769 const VibrationValue& vibration_value) {
770 if (!IsDeviceHandleValid(vibration_device_handle)) {
771 return;
772 }
773
745 if (!Settings::values.vibration_enabled.GetValue() && !permit_vibration_session_enabled) { 774 if (!Settings::values.vibration_enabled.GetValue() && !permit_vibration_session_enabled) {
746 return; 775 return;
747 } 776 }
@@ -798,12 +827,20 @@ void Controller_NPad::VibrateControllers(const std::vector<DeviceHandle>& vibrat
798 827
799Controller_NPad::VibrationValue Controller_NPad::GetLastVibration( 828Controller_NPad::VibrationValue Controller_NPad::GetLastVibration(
800 const DeviceHandle& vibration_device_handle) const { 829 const DeviceHandle& vibration_device_handle) const {
830 if (!IsDeviceHandleValid(vibration_device_handle)) {
831 return {};
832 }
833
801 const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id); 834 const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id);
802 const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index); 835 const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index);
803 return latest_vibration_values[npad_index][device_index]; 836 return latest_vibration_values[npad_index][device_index];
804} 837}
805 838
806void Controller_NPad::InitializeVibrationDevice(const DeviceHandle& vibration_device_handle) { 839void Controller_NPad::InitializeVibrationDevice(const DeviceHandle& vibration_device_handle) {
840 if (!IsDeviceHandleValid(vibration_device_handle)) {
841 return;
842 }
843
807 const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id); 844 const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id);
808 const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index); 845 const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index);
809 InitializeVibrationDeviceAtIndex(npad_index, device_index); 846 InitializeVibrationDeviceAtIndex(npad_index, device_index);
@@ -824,6 +861,10 @@ void Controller_NPad::SetPermitVibrationSession(bool permit_vibration_session) {
824} 861}
825 862
826bool Controller_NPad::IsVibrationDeviceMounted(const DeviceHandle& vibration_device_handle) const { 863bool Controller_NPad::IsVibrationDeviceMounted(const DeviceHandle& vibration_device_handle) const {
864 if (!IsDeviceHandleValid(vibration_device_handle)) {
865 return false;
866 }
867
827 const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id); 868 const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id);
828 const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index); 869 const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index);
829 return vibration_devices_mounted[npad_index][device_index]; 870 return vibration_devices_mounted[npad_index][device_index];
@@ -1017,7 +1058,7 @@ void Controller_NPad::ClearAllControllers() {
1017} 1058}
1018 1059
1019u32 Controller_NPad::GetAndResetPressState() { 1060u32 Controller_NPad::GetAndResetPressState() {
1020 return std::exchange(press_state, 0); 1061 return press_state.exchange(0);
1021} 1062}
1022 1063
1023bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const { 1064bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const {
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 96f319294..e2e826623 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
8#include "common/bit_field.h" 9#include "common/bit_field.h"
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "core/frontend/input.h" 11#include "core/frontend/input.h"
@@ -56,12 +57,14 @@ public:
56 JoyconLeft = 6, 57 JoyconLeft = 6,
57 JoyconRight = 7, 58 JoyconRight = 7,
58 Pokeball = 9, 59 Pokeball = 9,
60 MaxNpadType = 10,
59 }; 61 };
60 62
61 enum class DeviceIndex : u8 { 63 enum class DeviceIndex : u8 {
62 Left = 0, 64 Left = 0,
63 Right = 1, 65 Right = 1,
64 None = 2, 66 None = 2,
67 MaxDeviceIndex = 3,
65 }; 68 };
66 69
67 enum class GyroscopeZeroDriftMode : u32 { 70 enum class GyroscopeZeroDriftMode : u32 {
@@ -213,6 +216,8 @@ public:
213 static Settings::ControllerType MapNPadToSettingsType(Controller_NPad::NPadControllerType type); 216 static Settings::ControllerType MapNPadToSettingsType(Controller_NPad::NPadControllerType type);
214 static std::size_t NPadIdToIndex(u32 npad_id); 217 static std::size_t NPadIdToIndex(u32 npad_id);
215 static u32 IndexToNPad(std::size_t index); 218 static u32 IndexToNPad(std::size_t index);
219 static bool IsNpadIdValid(u32 npad_id);
220 static bool IsDeviceHandleValid(const DeviceHandle& device_handle);
216 221
217private: 222private:
218 struct CommonHeader { 223 struct CommonHeader {
@@ -411,7 +416,7 @@ private:
411 bool IsControllerSupported(NPadControllerType controller) const; 416 bool IsControllerSupported(NPadControllerType controller) const;
412 void RequestPadStateUpdate(u32 npad_id); 417 void RequestPadStateUpdate(u32 npad_id);
413 418
414 u32 press_state{}; 419 std::atomic<u32> press_state{};
415 420
416 NpadStyleSet style{}; 421 NpadStyleSet style{};
417 std::array<NPadEntry, 10> shared_memory_entries{}; 422 std::array<NPadEntry, 10> shared_memory_entries{};
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index b3c7234e1..8d95f74e6 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -78,11 +78,13 @@ IAppletResource::IAppletResource(Core::System& system_)
78 pad_update_event = Core::Timing::CreateEvent( 78 pad_update_event = Core::Timing::CreateEvent(
79 "HID::UpdatePadCallback", 79 "HID::UpdatePadCallback",
80 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { 80 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
81 const auto guard = LockService();
81 UpdateControllers(user_data, ns_late); 82 UpdateControllers(user_data, ns_late);
82 }); 83 });
83 motion_update_event = Core::Timing::CreateEvent( 84 motion_update_event = Core::Timing::CreateEvent(
84 "HID::MotionPadCallback", 85 "HID::MotionPadCallback",
85 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { 86 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
87 const auto guard = LockService();
86 UpdateMotion(user_data, ns_late); 88 UpdateMotion(user_data, ns_late);
87 }); 89 });
88 90
diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp
index f884b2735..8e49b068c 100644
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -68,7 +68,7 @@ private:
68 IPC::RequestParser rp{ctx}; 68 IPC::RequestParser rp{ctx};
69 const auto destination = rp.PopEnum<DestinationFlag>(); 69 const auto destination = rp.PopEnum<DestinationFlag>();
70 70
71 LOG_DEBUG(Service_LM, "called, destination={:08X}", static_cast<u32>(destination)); 71 LOG_DEBUG(Service_LM, "called, destination={:08X}", destination);
72 72
73 manager.SetDestination(destination); 73 manager.SetDestination(destination);
74 74
diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp
index b8d627ca8..2dcda16f6 100644
--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -45,7 +45,7 @@ public:
45 } 45 }
46 46
47private: 47private:
48 FileSys::StorageId storage; 48 [[maybe_unused]] FileSys::StorageId storage;
49}; 49};
50 50
51class IRegisteredLocationResolver final : public ServiceFramework<IRegisteredLocationResolver> { 51class IRegisteredLocationResolver final : public ServiceFramework<IRegisteredLocationResolver> {
diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp
index d33b26129..d16223064 100644
--- a/src/core/hle/service/nim/nim.cpp
+++ b/src/core/hle/service/nim/nim.cpp
@@ -217,7 +217,7 @@ public:
217 {1, nullptr, "RefreshDebugAvailability"}, 217 {1, nullptr, "RefreshDebugAvailability"},
218 {2, nullptr, "ClearDebugResponse"}, 218 {2, nullptr, "ClearDebugResponse"},
219 {3, nullptr, "RegisterDebugResponse"}, 219 {3, nullptr, "RegisterDebugResponse"},
220 {4, nullptr, "IsLargeResourceAvailable"}, 220 {4, &NIM_ECA::IsLargeResourceAvailable, "IsLargeResourceAvailable"},
221 }; 221 };
222 // clang-format on 222 // clang-format on
223 223
@@ -231,6 +231,18 @@ private:
231 rb.Push(RESULT_SUCCESS); 231 rb.Push(RESULT_SUCCESS);
232 rb.PushIpcInterface<IShopServiceAccessServer>(system); 232 rb.PushIpcInterface<IShopServiceAccessServer>(system);
233 } 233 }
234
235 void IsLargeResourceAvailable(Kernel::HLERequestContext& ctx) {
236 IPC::RequestParser rp{ctx};
237
238 const auto unknown{rp.Pop<u64>()};
239
240 LOG_INFO(Service_NIM, "(STUBBED) called, unknown={}", unknown);
241
242 IPC::ResponseBuilder rb{ctx, 3};
243 rb.Push(RESULT_SUCCESS);
244 rb.Push(false);
245 }
234}; 246};
235 247
236class NIM_SHP final : public ServiceFramework<NIM_SHP> { 248class NIM_SHP final : public ServiceFramework<NIM_SHP> {
diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp
index ef7584641..6ccf8995c 100644
--- a/src/core/hle/service/ns/ns.cpp
+++ b/src/core/hle/service/ns/ns.cpp
@@ -673,7 +673,7 @@ public:
673 explicit NS_VM(Core::System& system_) : ServiceFramework{system_, "ns:vm"} { 673 explicit NS_VM(Core::System& system_) : ServiceFramework{system_, "ns:vm"} {
674 // clang-format off 674 // clang-format off
675 static const FunctionInfo functions[] = { 675 static const FunctionInfo functions[] = {
676 {1200, nullptr, "NeedsUpdateVulnerability"}, 676 {1200, &NS_VM::NeedsUpdateVulnerability, "NeedsUpdateVulnerability"},
677 {1201, nullptr, "UpdateSafeSystemVersionForDebug"}, 677 {1201, nullptr, "UpdateSafeSystemVersionForDebug"},
678 {1202, nullptr, "GetSafeSystemVersion"}, 678 {1202, nullptr, "GetSafeSystemVersion"},
679 }; 679 };
@@ -681,6 +681,15 @@ public:
681 681
682 RegisterHandlers(functions); 682 RegisterHandlers(functions);
683 } 683 }
684
685private:
686 void NeedsUpdateVulnerability(Kernel::HLERequestContext& ctx) {
687 LOG_WARNING(Service_NS, "(STUBBED) called");
688
689 IPC::ResponseBuilder rb{ctx, 3};
690 rb.Push(RESULT_SUCCESS);
691 rb.Push(false);
692 }
684}; 693};
685 694
686void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { 695void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index ccc137e40..71c7587db 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -27,29 +27,11 @@
27 27
28namespace Service::NS { 28namespace Service::NS {
29 29
30enum class FontArchives : u64 {
31 Extension = 0x0100000000000810,
32 Standard = 0x0100000000000811,
33 Korean = 0x0100000000000812,
34 ChineseTraditional = 0x0100000000000813,
35 ChineseSimple = 0x0100000000000814,
36};
37
38struct FontRegion { 30struct FontRegion {
39 u32 offset; 31 u32 offset;
40 u32 size; 32 u32 size;
41}; 33};
42 34
43constexpr std::array<std::pair<FontArchives, const char*>, 7> SHARED_FONTS{
44 std::make_pair(FontArchives::Standard, "nintendo_udsg-r_std_003.bfttf"),
45 std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_org_zh-cn_003.bfttf"),
46 std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_ext_zh-cn_003.bfttf"),
47 std::make_pair(FontArchives::ChineseTraditional, "nintendo_udjxh-db_zh-tw_003.bfttf"),
48 std::make_pair(FontArchives::Korean, "nintendo_udsg-r_ko_003.bfttf"),
49 std::make_pair(FontArchives::Extension, "nintendo_ext_003.bfttf"),
50 std::make_pair(FontArchives::Extension, "nintendo_ext2_003.bfttf"),
51};
52
53// The below data is specific to shared font data dumped from Switch on f/w 2.2 35// The below data is specific to shared font data dumped from Switch on f/w 2.2
54// Virtual address and offsets/sizes likely will vary by dump 36// Virtual address and offsets/sizes likely will vary by dump
55[[maybe_unused]] constexpr VAddr SHARED_FONT_MEM_VADDR{0x00000009d3016000ULL}; 37[[maybe_unused]] constexpr VAddr SHARED_FONT_MEM_VADDR{0x00000009d3016000ULL};
@@ -80,6 +62,18 @@ static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMem
80 offset += transformed_font.size() * sizeof(u32); 62 offset += transformed_font.size() * sizeof(u32);
81} 63}
82 64
65void DecryptSharedFontToTTF(const std::vector<u32>& input, std::vector<u8>& output) {
66 ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number");
67
68 const u32 KEY = input[0] ^ EXPECTED_RESULT; // Derive key using an inverse xor
69 std::vector<u32> transformed_font(input.size());
70 // TODO(ogniK): Figure out a better way to do this
71 std::transform(input.begin(), input.end(), transformed_font.begin(),
72 [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); });
73 transformed_font[1] = Common::swap32(transformed_font[1]) ^ KEY; // "re-encrypt" the size
74 std::memcpy(output.data(), transformed_font.data() + 2, transformed_font.size() * sizeof(u32));
75}
76
83void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, 77void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output,
84 std::size_t& offset) { 78 std::size_t& offset) {
85 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, 79 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
@@ -182,21 +176,18 @@ PL_U::PL_U(Core::System& system_)
182 } 176 }
183 177
184 if (!romfs) { 178 if (!romfs) {
185 LOG_ERROR(Service_NS, "Failed to find or synthesize {:016X}! Skipping", 179 LOG_ERROR(Service_NS, "Failed to find or synthesize {:016X}! Skipping", font.first);
186 static_cast<u64>(font.first));
187 continue; 180 continue;
188 } 181 }
189 182
190 const auto extracted_romfs = FileSys::ExtractRomFS(romfs); 183 const auto extracted_romfs = FileSys::ExtractRomFS(romfs);
191 if (!extracted_romfs) { 184 if (!extracted_romfs) {
192 LOG_ERROR(Service_NS, "Failed to extract RomFS for {:016X}! Skipping", 185 LOG_ERROR(Service_NS, "Failed to extract RomFS for {:016X}! Skipping", font.first);
193 static_cast<u64>(font.first));
194 continue; 186 continue;
195 } 187 }
196 const auto font_fp = extracted_romfs->GetFile(font.second); 188 const auto font_fp = extracted_romfs->GetFile(font.second);
197 if (!font_fp) { 189 if (!font_fp) {
198 LOG_ERROR(Service_NS, "{:016X} has no file \"{}\"! Skipping", 190 LOG_ERROR(Service_NS, "{:016X} has no file \"{}\"! Skipping", font.first, font.second);
199 static_cast<u64>(font.first), font.second);
200 continue; 191 continue;
201 } 192 }
202 std::vector<u32> font_data_u32(font_fp->GetSize() / sizeof(u32)); 193 std::vector<u32> font_data_u32(font_fp->GetSize() / sizeof(u32));
diff --git a/src/core/hle/service/ns/pl_u.h b/src/core/hle/service/ns/pl_u.h
index 224dcb997..f920c7f69 100644
--- a/src/core/hle/service/ns/pl_u.h
+++ b/src/core/hle/service/ns/pl_u.h
@@ -16,6 +16,25 @@ class FileSystemController;
16 16
17namespace NS { 17namespace NS {
18 18
19enum class FontArchives : u64 {
20 Extension = 0x0100000000000810,
21 Standard = 0x0100000000000811,
22 Korean = 0x0100000000000812,
23 ChineseTraditional = 0x0100000000000813,
24 ChineseSimple = 0x0100000000000814,
25};
26
27constexpr std::array<std::pair<FontArchives, const char*>, 7> SHARED_FONTS{
28 std::make_pair(FontArchives::Standard, "nintendo_udsg-r_std_003.bfttf"),
29 std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_org_zh-cn_003.bfttf"),
30 std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_ext_zh-cn_003.bfttf"),
31 std::make_pair(FontArchives::ChineseTraditional, "nintendo_udjxh-db_zh-tw_003.bfttf"),
32 std::make_pair(FontArchives::Korean, "nintendo_udsg-r_ko_003.bfttf"),
33 std::make_pair(FontArchives::Extension, "nintendo_ext_003.bfttf"),
34 std::make_pair(FontArchives::Extension, "nintendo_ext2_003.bfttf"),
35};
36
37void DecryptSharedFontToTTF(const std::vector<u32>& input, std::vector<u8>& output);
19void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, std::size_t& offset); 38void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, std::size_t& offset);
20 39
21class PL_U final : public ServiceFramework<PL_U> { 40class PL_U final : public ServiceFramework<PL_U> {
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 44a8bc060..5681599ba 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -31,8 +31,8 @@ public:
31 * @param output A buffer where the output data will be written to. 31 * @param output A buffer where the output data will be written to.
32 * @returns The result code of the ioctl. 32 * @returns The result code of the ioctl.
33 */ 33 */
34 virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 34 virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input,
35 IoctlCtrl& ctrl) = 0; 35 std::vector<u8>& output) = 0;
36 36
37 /** 37 /**
38 * Handles an ioctl2 request. 38 * Handles an ioctl2 request.
@@ -43,8 +43,7 @@ public:
43 * @returns The result code of the ioctl. 43 * @returns The result code of the ioctl.
44 */ 44 */
45 virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 45 virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
46 const std::vector<u8>& inline_input, std::vector<u8>& output, 46 const std::vector<u8>& inline_input, std::vector<u8>& output) = 0;
47 IoctlCtrl& ctrl) = 0;
48 47
49 /** 48 /**
50 * Handles an ioctl3 request. 49 * Handles an ioctl3 request.
@@ -55,7 +54,7 @@ public:
55 * @returns The result code of the ioctl. 54 * @returns The result code of the ioctl.
56 */ 55 */
57 virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 56 virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
58 std::vector<u8>& inline_output, IoctlCtrl& ctrl) = 0; 57 std::vector<u8>& inline_output) = 0;
59 58
60protected: 59protected:
61 Core::System& system; 60 Core::System& system;
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 170a7c9a0..ce615c758 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -18,21 +18,20 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_de
18 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} 18 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
19nvdisp_disp0 ::~nvdisp_disp0() = default; 19nvdisp_disp0 ::~nvdisp_disp0() = default;
20 20
21NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 21NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input,
22 IoctlCtrl& ctrl) { 22 std::vector<u8>& output) {
23 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 23 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
24 return NvResult::NotImplemented; 24 return NvResult::NotImplemented;
25} 25}
26 26
27NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input, 27NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input,
28 const std::vector<u8>& inline_input, std::vector<u8>& output, 28 const std::vector<u8>& inline_input, std::vector<u8>& output) {
29 IoctlCtrl& ctrl) {
30 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 29 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
31 return NvResult::NotImplemented; 30 return NvResult::NotImplemented;
32} 31}
33 32
34NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 33NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
35 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 34 std::vector<u8>& inline_output) {
36 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 35 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
37 return NvResult::NotImplemented; 36 return NvResult::NotImplemented;
38} 37}
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index eb7575e40..55a33b7e4 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -20,13 +20,11 @@ public:
20 explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvdisp_disp0() override; 21 ~nvdisp_disp0() override;
22 22
23 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 23 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
24 IoctlCtrl& ctrl) override;
25 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 24 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
26 const std::vector<u8>& inline_input, std::vector<u8>& output, 25 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
27 IoctlCtrl& ctrl) override;
28 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 26 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
29 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 27 std::vector<u8>& inline_output) override;
30 28
31 /// Performs a screen flip, drawing the buffer pointed to by the handle. 29 /// Performs a screen flip, drawing the buffer pointed to by the handle.
32 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 30 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 4e0652c39..6b062e10e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -21,8 +21,8 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_
21 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} 21 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
22nvhost_as_gpu::~nvhost_as_gpu() = default; 22nvhost_as_gpu::~nvhost_as_gpu() = default;
23 23
24NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 24NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
25 IoctlCtrl& ctrl) { 25 std::vector<u8>& output) {
26 switch (command.group) { 26 switch (command.group) {
27 case 'A': 27 case 'A':
28 switch (command.cmd) { 28 switch (command.cmd) {
@@ -55,14 +55,13 @@ NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std:
55} 55}
56 56
57NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, 57NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
58 const std::vector<u8>& inline_input, std::vector<u8>& output, 58 const std::vector<u8>& inline_input, std::vector<u8>& output) {
59 IoctlCtrl& ctrl) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 60 return NvResult::NotImplemented;
62} 61}
63 62
64NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 63NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
65 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 64 std::vector<u8>& inline_output) {
66 switch (command.group) { 65 switch (command.group) {
67 case 'A': 66 case 'A':
68 switch (command.cmd) { 67 switch (command.cmd) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 2bd355af9..08035fa0e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -30,13 +30,11 @@ public:
30 explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 30 explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
31 ~nvhost_as_gpu() override; 31 ~nvhost_as_gpu() override;
32 32
33 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 33 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
34 IoctlCtrl& ctrl) override;
35 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 34 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
36 const std::vector<u8>& inline_input, std::vector<u8>& output, 35 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
37 IoctlCtrl& ctrl) override;
38 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 36 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
39 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 37 std::vector<u8>& inline_output) override;
40 38
41private: 39private:
42 class BufferMap final { 40 class BufferMap final {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 92d31b620..fea3b7b9f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -20,8 +20,7 @@ nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
20 : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} 20 : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
21nvhost_ctrl::~nvhost_ctrl() = default; 21nvhost_ctrl::~nvhost_ctrl() = default;
22 22
23NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 23NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
24 IoctlCtrl& ctrl) {
25 switch (command.group) { 24 switch (command.group) {
26 case 0x0: 25 case 0x0:
27 switch (command.cmd) { 26 switch (command.cmd) {
@@ -30,9 +29,9 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v
30 case 0x1c: 29 case 0x1c:
31 return IocCtrlClearEventWait(input, output); 30 return IocCtrlClearEventWait(input, output);
32 case 0x1d: 31 case 0x1d:
33 return IocCtrlEventWait(input, output, false, ctrl); 32 return IocCtrlEventWait(input, output, false);
34 case 0x1e: 33 case 0x1e:
35 return IocCtrlEventWait(input, output, true, ctrl); 34 return IocCtrlEventWait(input, output, true);
36 case 0x1f: 35 case 0x1f:
37 return IocCtrlEventRegister(input, output); 36 return IocCtrlEventRegister(input, output);
38 case 0x20: 37 case 0x20:
@@ -48,14 +47,13 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v
48} 47}
49 48
50NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input, 49NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input,
51 const std::vector<u8>& inline_input, std::vector<u8>& output, 50 const std::vector<u8>& inline_input, std::vector<u8>& output) {
52 IoctlCtrl& ctrl) {
53 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 51 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
54 return NvResult::NotImplemented; 52 return NvResult::NotImplemented;
55} 53}
56 54
57NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 55NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
58 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 56 std::vector<u8>& inline_outpu) {
59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 57 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
60 return NvResult::NotImplemented; 58 return NvResult::NotImplemented;
61} 59}
@@ -69,7 +67,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector
69} 67}
70 68
71NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, 69NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
72 bool is_async, IoctlCtrl& ctrl) { 70 bool is_async) {
73 IocCtrlEventWaitParams params{}; 71 IocCtrlEventWaitParams params{};
74 std::memcpy(&params, input.data(), sizeof(params)); 72 std::memcpy(&params, input.data(), sizeof(params));
75 LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", 73 LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
@@ -141,12 +139,6 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
141 params.value |= event_id; 139 params.value |= event_id;
142 event.event.writable->Clear(); 140 event.event.writable->Clear();
143 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); 141 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
144 if (!is_async && ctrl.fresh_call) {
145 ctrl.must_delay = true;
146 ctrl.timeout = params.timeout;
147 ctrl.event_id = event_id;
148 return NvResult::Timeout;
149 }
150 std::memcpy(output.data(), &params, sizeof(params)); 142 std::memcpy(output.data(), &params, sizeof(params));
151 return NvResult::Timeout; 143 return NvResult::Timeout;
152 } 144 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 107168e21..c5aa1362a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -18,13 +18,11 @@ public:
18 SyncpointManager& syncpoint_manager); 18 SyncpointManager& syncpoint_manager);
19 ~nvhost_ctrl() override; 19 ~nvhost_ctrl() override;
20 20
21 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 21 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
22 IoctlCtrl& ctrl) override;
23 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 22 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
24 const std::vector<u8>& inline_input, std::vector<u8>& output, 23 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
25 IoctlCtrl& ctrl) override;
26 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 24 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
27 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 25 std::vector<u8>& inline_output) override;
28 26
29private: 27private:
30 struct IocSyncptReadParams { 28 struct IocSyncptReadParams {
@@ -123,8 +121,7 @@ private:
123 static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size"); 121 static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size");
124 122
125 NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); 123 NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
126 NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async, 124 NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async);
127 IoctlCtrl& ctrl);
128 NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); 125 NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
129 NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); 126 NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
130 NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); 127 NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 647f5907e..0320d3ae2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -16,7 +16,7 @@ nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {}
16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; 16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;
17 17
18NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, 18NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
19 std::vector<u8>& output, IoctlCtrl& ctrl) { 19 std::vector<u8>& output) {
20 switch (command.group) { 20 switch (command.group) {
21 case 'G': 21 case 'G':
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -48,15 +48,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
48} 48}
49 49
50NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, 50NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
51 const std::vector<u8>& inline_input, std::vector<u8>& output, 51 const std::vector<u8>& inline_input, std::vector<u8>& output) {
52 IoctlCtrl& ctrl) {
53 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 52 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
54 return NvResult::NotImplemented; 53 return NvResult::NotImplemented;
55} 54}
56 55
57NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, 56NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input,
58 std::vector<u8>& output, std::vector<u8>& inline_output, 57 std::vector<u8>& output, std::vector<u8>& inline_output) {
59 IoctlCtrl& ctrl) {
60 switch (command.group) { 58 switch (command.group) {
61 case 'G': 59 case 'G':
62 switch (command.cmd) { 60 switch (command.cmd) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index c2fffe734..137b88238 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -16,13 +16,11 @@ public:
16 explicit nvhost_ctrl_gpu(Core::System& system); 16 explicit nvhost_ctrl_gpu(Core::System& system);
17 ~nvhost_ctrl_gpu() override; 17 ~nvhost_ctrl_gpu() override;
18 18
19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
20 IoctlCtrl& ctrl) override;
21 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 20 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
22 const std::vector<u8>& inline_input, std::vector<u8>& output, 21 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
23 IoctlCtrl& ctrl) override;
24 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
25 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 23 std::vector<u8>& inline_output) override;
26 24
27private: 25private:
28 struct IoctlGpuCharacteristics { 26 struct IoctlGpuCharacteristics {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index b0c2caba5..af8b3d9f1 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -23,8 +23,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
23 23
24nvhost_gpu::~nvhost_gpu() = default; 24nvhost_gpu::~nvhost_gpu() = default;
25 25
26NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 26NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
27 IoctlCtrl& ctrl) {
28 switch (command.group) { 27 switch (command.group) {
29 case 0x0: 28 case 0x0:
30 switch (command.cmd) { 29 switch (command.cmd) {
@@ -76,8 +75,7 @@ NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
76}; 75};
77 76
78NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, 77NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
79 const std::vector<u8>& inline_input, std::vector<u8>& output, 78 const std::vector<u8>& inline_input, std::vector<u8>& output) {
80 IoctlCtrl& ctrl) {
81 switch (command.group) { 79 switch (command.group) {
82 case 'H': 80 case 'H':
83 switch (command.cmd) { 81 switch (command.cmd) {
@@ -91,7 +89,7 @@ NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
91} 89}
92 90
93NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 91NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
94 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 92 std::vector<u8>& inline_output) {
95 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 93 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
96 return NvResult::NotImplemented; 94 return NvResult::NotImplemented;
97} 95}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index aa0048a9d..e0298b4fe 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -26,13 +26,11 @@ public:
26 SyncpointManager& syncpoint_manager); 26 SyncpointManager& syncpoint_manager);
27 ~nvhost_gpu() override; 27 ~nvhost_gpu() override;
28 28
29 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 29 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
30 IoctlCtrl& ctrl) override;
31 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 30 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
32 const std::vector<u8>& inline_input, std::vector<u8>& output, 31 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
33 IoctlCtrl& ctrl) override;
34 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 32 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
35 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 33 std::vector<u8>& inline_output) override;
36 34
37private: 35private:
38 enum class CtxObjects : u32_le { 36 enum class CtxObjects : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index b8328c314..d8735491c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -15,8 +15,8 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_de
15 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} 15 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
16nvhost_nvdec::~nvhost_nvdec() = default; 16nvhost_nvdec::~nvhost_nvdec() = default;
17 17
18NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 18NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
19 IoctlCtrl& ctrl) { 19 std::vector<u8>& output) {
20 switch (command.group) { 20 switch (command.group) {
21 case 0x0: 21 case 0x0:
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -58,14 +58,13 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::
58} 58}
59 59
60NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input, 60NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input,
61 const std::vector<u8>& inline_input, std::vector<u8>& output, 61 const std::vector<u8>& inline_input, std::vector<u8>& output) {
62 IoctlCtrl& ctrl) {
63 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 62 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
64 return NvResult::NotImplemented; 63 return NvResult::NotImplemented;
65} 64}
66 65
67NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 66NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
68 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 67 std::vector<u8>& inline_output) {
69 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 68 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
70 return NvResult::NotImplemented; 69 return NvResult::NotImplemented;
71} 70}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 884ed6c5b..79b8b6de1 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -14,13 +14,11 @@ public:
14 explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 14 explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
15 ~nvhost_nvdec() override; 15 ~nvhost_nvdec() override;
16 16
17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
18 IoctlCtrl& ctrl) override;
19 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 18 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
20 const std::vector<u8>& inline_input, std::vector<u8>& output, 19 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
21 IoctlCtrl& ctrl) override;
22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 20 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
23 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 21 std::vector<u8>& inline_output) override;
24}; 22};
25 23
26} // namespace Service::Nvidia::Devices 24} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index ab152bf0e..d9f95ba58 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -18,39 +18,6 @@ public:
18 explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 18 explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
19 ~nvhost_nvdec_common() override; 19 ~nvhost_nvdec_common() override;
20 20
21 /**
22 * Handles an ioctl1 request.
23 * @param command The ioctl command id.
24 * @param input A buffer containing the input data for the ioctl.
25 * @param output A buffer where the output data will be written to.
26 * @returns The result code of the ioctl.
27 */
28 virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
29 IoctlCtrl& ctrl) = 0;
30
31 /**
32 * Handles an ioctl2 request.
33 * @param command The ioctl command id.
34 * @param input A buffer containing the input data for the ioctl.
35 * @param inline_input A buffer containing the input data for the ioctl which has been inlined.
36 * @param output A buffer where the output data will be written to.
37 * @returns The result code of the ioctl.
38 */
39 virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
40 const std::vector<u8>& inline_input, std::vector<u8>& output,
41 IoctlCtrl& ctrl) = 0;
42
43 /**
44 * Handles an ioctl3 request.
45 * @param command The ioctl command id.
46 * @param input A buffer containing the input data for the ioctl.
47 * @param output A buffer where the output data will be written to.
48 * @param inline_output A buffer where the inlined output data will be written to.
49 * @returns The result code of the ioctl.
50 */
51 virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
52 std::vector<u8>& inline_output, IoctlCtrl& ctrl) = 0;
53
54protected: 21protected:
55 class BufferMap final { 22 class BufferMap final {
56 public: 23 public:
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 6f4ab0ab3..2d06955c0 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -13,8 +13,8 @@ namespace Service::Nvidia::Devices {
13nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} 13nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {}
14nvhost_nvjpg::~nvhost_nvjpg() = default; 14nvhost_nvjpg::~nvhost_nvjpg() = default;
15 15
16NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 16NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input,
17 IoctlCtrl& ctrl) { 17 std::vector<u8>& output) {
18 switch (command.group) { 18 switch (command.group) {
19 case 'H': 19 case 'H':
20 switch (command.cmd) { 20 switch (command.cmd) {
@@ -33,14 +33,13 @@ NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::
33} 33}
34 34
35NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input, 35NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input,
36 const std::vector<u8>& inline_input, std::vector<u8>& output, 36 const std::vector<u8>& inline_input, std::vector<u8>& output) {
37 IoctlCtrl& ctrl) {
38 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 37 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
39 return NvResult::NotImplemented; 38 return NvResult::NotImplemented;
40} 39}
41 40
42NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 41NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
43 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 42 std::vector<u8>& inline_output) {
44 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 43 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
45 return NvResult::NotImplemented; 44 return NvResult::NotImplemented;
46} 45}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 6fb99d959..43948d18d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -16,13 +16,11 @@ public:
16 explicit nvhost_nvjpg(Core::System& system); 16 explicit nvhost_nvjpg(Core::System& system);
17 ~nvhost_nvjpg() override; 17 ~nvhost_nvjpg() override;
18 18
19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
20 IoctlCtrl& ctrl) override;
21 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 20 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
22 const std::vector<u8>& inline_input, std::vector<u8>& output, 21 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
23 IoctlCtrl& ctrl) override;
24 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
25 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 23 std::vector<u8>& inline_output) override;
26 24
27private: 25private:
28 struct IoctlSetNvmapFD { 26 struct IoctlSetNvmapFD {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 55a17f423..805fe86ae 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -15,8 +15,7 @@ nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
15 15
16nvhost_vic::~nvhost_vic() = default; 16nvhost_vic::~nvhost_vic() = default;
17 17
18NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 18NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
19 IoctlCtrl& ctrl) {
20 switch (command.group) { 19 switch (command.group) {
21 case 0x0: 20 case 0x0:
22 switch (command.cmd) { 21 switch (command.cmd) {
@@ -51,14 +50,13 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
51} 50}
52 51
53NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input, 52NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input,
54 const std::vector<u8>& inline_input, std::vector<u8>& output, 53 const std::vector<u8>& inline_input, std::vector<u8>& output) {
55 IoctlCtrl& ctrl) {
56 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 54 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
57 return NvResult::NotImplemented; 55 return NvResult::NotImplemented;
58} 56}
59 57
60NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 58NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
61 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 59 std::vector<u8>& inline_output) {
62 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
63 return NvResult::NotImplemented; 61 return NvResult::NotImplemented;
64} 62}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index 7f4858cd4..b2e11f4d4 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -14,12 +14,10 @@ public:
14 explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 14 explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
15 ~nvhost_vic(); 15 ~nvhost_vic();
16 16
17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
18 IoctlCtrl& ctrl) override;
19 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 18 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
20 const std::vector<u8>& inline_input, std::vector<u8>& output, 19 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
21 IoctlCtrl& ctrl) override;
22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 20 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
23 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 21 std::vector<u8>& inline_output) override;
24}; 22};
25} // namespace Service::Nvidia::Devices 23} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 910cfee51..4015a2740 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -19,8 +19,7 @@ nvmap::nvmap(Core::System& system) : nvdevice(system) {
19 19
20nvmap::~nvmap() = default; 20nvmap::~nvmap() = default;
21 21
22NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
23 IoctlCtrl& ctrl) {
24 switch (command.group) { 23 switch (command.group) {
25 case 0x1: 24 case 0x1:
26 switch (command.cmd) { 25 switch (command.cmd) {
@@ -49,14 +48,13 @@ NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<
49} 48}
50 49
51NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input, 50NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input,
52 const std::vector<u8>& inline_input, std::vector<u8>& output, 51 const std::vector<u8>& inline_input, std::vector<u8>& output) {
53 IoctlCtrl& ctrl) {
54 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 52 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
55 return NvResult::NotImplemented; 53 return NvResult::NotImplemented;
56} 54}
57 55
58NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 56NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
59 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 57 std::vector<u8>& inline_output) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 58 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 59 return NvResult::NotImplemented;
62} 60}
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index c0c2fa5eb..4484bd79f 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -19,13 +19,11 @@ public:
19 explicit nvmap(Core::System& system); 19 explicit nvmap(Core::System& system);
20 ~nvmap() override; 20 ~nvmap() override;
21 21
22 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
23 IoctlCtrl& ctrl) override;
24 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 23 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
25 const std::vector<u8>& inline_input, std::vector<u8>& output, 24 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
26 IoctlCtrl& ctrl) override;
27 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 25 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
28 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 26 std::vector<u8>& inline_output) override;
29 27
30 /// Returns the allocated address of an nvmap object given its handle. 28 /// Returns the allocated address of an nvmap object given its handle.
31 VAddr GetObjectAddress(u32 handle) const; 29 VAddr GetObjectAddress(u32 handle) const;
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index d72c531f6..cc23b001c 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -61,32 +61,9 @@ void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) {
61 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 61 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
62 const auto input_buffer = ctx.ReadBuffer(0); 62 const auto input_buffer = ctx.ReadBuffer(0);
63 63
64 IoctlCtrl ctrl{}; 64 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
65 65 if (command.is_out != 0) {
66 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer, ctrl); 66 ctx.WriteBuffer(output_buffer);
67 if (ctrl.must_delay) {
68 ctrl.fresh_call = false;
69 ctx.SleepClientThread(
70 "NVServices::DelayedResponse", ctrl.timeout,
71 [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
72 Kernel::ThreadWakeupReason reason) {
73 IoctlCtrl ctrl2{ctrl};
74 std::vector<u8> tmp_output = output_buffer;
75 const auto nv_result2 = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output, ctrl2);
76
77 if (command.is_out != 0) {
78 ctx.WriteBuffer(tmp_output);
79 }
80
81 IPC::ResponseBuilder rb{ctx_, 3};
82 rb.Push(RESULT_SUCCESS);
83 rb.PushEnum(nv_result2);
84 },
85 nvdrv->GetEventWriteable(ctrl.event_id));
86 } else {
87 if (command.is_out != 0) {
88 ctx.WriteBuffer(output_buffer);
89 }
90 } 67 }
91 68
92 IPC::ResponseBuilder rb{ctx, 3}; 69 IPC::ResponseBuilder rb{ctx, 3};
@@ -110,36 +87,8 @@ void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) {
110 const auto input_inlined_buffer = ctx.ReadBuffer(1); 87 const auto input_inlined_buffer = ctx.ReadBuffer(1);
111 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 88 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
112 89
113 IoctlCtrl ctrl{};
114
115 const auto nv_result = 90 const auto nv_result =
116 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer, ctrl); 91 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
117 if (ctrl.must_delay) {
118 ctrl.fresh_call = false;
119 ctx.SleepClientThread(
120 "NVServices::DelayedResponse", ctrl.timeout,
121 [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
122 Kernel::ThreadWakeupReason reason) {
123 IoctlCtrl ctrl2{ctrl};
124 std::vector<u8> tmp_output = output_buffer;
125 const auto nv_result2 = nvdrv->Ioctl2(fd, command, input_buffer,
126 input_inlined_buffer, tmp_output, ctrl2);
127
128 if (command.is_out != 0) {
129 ctx.WriteBuffer(tmp_output);
130 }
131
132 IPC::ResponseBuilder rb{ctx_, 3};
133 rb.Push(RESULT_SUCCESS);
134 rb.PushEnum(nv_result2);
135 },
136 nvdrv->GetEventWriteable(ctrl.event_id));
137 } else {
138 if (command.is_out != 0) {
139 ctx.WriteBuffer(output_buffer);
140 }
141 }
142
143 if (command.is_out != 0) { 92 if (command.is_out != 0) {
144 ctx.WriteBuffer(output_buffer); 93 ctx.WriteBuffer(output_buffer);
145 } 94 }
@@ -165,36 +114,11 @@ void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) {
165 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 114 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
166 std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); 115 std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1));
167 116
168 IoctlCtrl ctrl{};
169 const auto nv_result = 117 const auto nv_result =
170 nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline, ctrl); 118 nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
171 if (ctrl.must_delay) { 119 if (command.is_out != 0) {
172 ctrl.fresh_call = false; 120 ctx.WriteBuffer(output_buffer, 0);
173 ctx.SleepClientThread( 121 ctx.WriteBuffer(output_buffer_inline, 1);
174 "NVServices::DelayedResponse", ctrl.timeout,
175 [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
176 Kernel::ThreadWakeupReason reason) {
177 IoctlCtrl ctrl2{ctrl};
178 std::vector<u8> tmp_output = output_buffer;
179 std::vector<u8> tmp_output2 = output_buffer;
180 const auto nv_result2 =
181 nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output2, ctrl2);
182
183 if (command.is_out != 0) {
184 ctx.WriteBuffer(tmp_output, 0);
185 ctx.WriteBuffer(tmp_output2, 1);
186 }
187
188 IPC::ResponseBuilder rb{ctx_, 3};
189 rb.Push(RESULT_SUCCESS);
190 rb.PushEnum(nv_result2);
191 },
192 nvdrv->GetEventWriteable(ctrl.event_id));
193 } else {
194 if (command.is_out != 0) {
195 ctx.WriteBuffer(output_buffer, 0);
196 ctx.WriteBuffer(output_buffer_inline, 1);
197 }
198 } 122 }
199 123
200 IPC::ResponseBuilder rb{ctx, 3}; 124 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h
index a3c4ecd85..3294bc0e7 100644
--- a/src/core/hle/service/nvdrv/nvdata.h
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -97,15 +97,4 @@ union Ioctl {
97 BitField<31, 1, u32> is_out; 97 BitField<31, 1, u32> is_out;
98}; 98};
99 99
100struct IoctlCtrl {
101 // First call done to the servioce for services that call itself again after a call.
102 bool fresh_call{true};
103 // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep
104 bool must_delay{};
105 // Timeout for the delay
106 s64 timeout{};
107 // NV Event Id
108 s32 event_id{-1};
109};
110
111} // namespace Service::Nvidia 100} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 8e0c9f093..e03195afe 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -91,7 +91,7 @@ DeviceFD Module::Open(const std::string& device_name) {
91} 91}
92 92
93NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 93NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
94 std::vector<u8>& output, IoctlCtrl& ctrl) { 94 std::vector<u8>& output) {
95 if (fd < 0) { 95 if (fd < 0) {
96 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 96 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
97 return NvResult::InvalidState; 97 return NvResult::InvalidState;
@@ -104,12 +104,11 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input
104 return NvResult::NotImplemented; 104 return NvResult::NotImplemented;
105 } 105 }
106 106
107 return itr->second->Ioctl1(command, input, output, ctrl); 107 return itr->second->Ioctl1(command, input, output);
108} 108}
109 109
110NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 110NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
111 const std::vector<u8>& inline_input, std::vector<u8>& output, 111 const std::vector<u8>& inline_input, std::vector<u8>& output) {
112 IoctlCtrl& ctrl) {
113 if (fd < 0) { 112 if (fd < 0) {
114 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 113 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
115 return NvResult::InvalidState; 114 return NvResult::InvalidState;
@@ -122,11 +121,11 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input
122 return NvResult::NotImplemented; 121 return NvResult::NotImplemented;
123 } 122 }
124 123
125 return itr->second->Ioctl2(command, input, inline_input, output, ctrl); 124 return itr->second->Ioctl2(command, input, inline_input, output);
126} 125}
127 126
128NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 127NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
129 std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 128 std::vector<u8>& output, std::vector<u8>& inline_output) {
130 if (fd < 0) { 129 if (fd < 0) {
131 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 130 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
132 return NvResult::InvalidState; 131 return NvResult::InvalidState;
@@ -139,7 +138,7 @@ NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input
139 return NvResult::NotImplemented; 138 return NvResult::NotImplemented;
140 } 139 }
141 140
142 return itr->second->Ioctl3(command, input, output, inline_output, ctrl); 141 return itr->second->Ioctl3(command, input, output, inline_output);
143} 142}
144 143
145NvResult Module::Close(DeviceFD fd) { 144NvResult Module::Close(DeviceFD fd) {
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 5985d2179..144e657e5 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -119,13 +119,13 @@ public:
119 119
120 /// Sends an ioctl command to the specified file descriptor. 120 /// Sends an ioctl command to the specified file descriptor.
121 NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 121 NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
122 std::vector<u8>& output, IoctlCtrl& ctrl); 122 std::vector<u8>& output);
123 123
124 NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 124 NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
125 const std::vector<u8>& inline_input, std::vector<u8>& output, IoctlCtrl& ctrl); 125 const std::vector<u8>& inline_input, std::vector<u8>& output);
126 126
127 NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 127 NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
128 std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl); 128 std::vector<u8>& output, std::vector<u8>& inline_output);
129 129
130 /// Closes a device file descriptor and returns operation success. 130 /// Closes a device file descriptor and returns operation success.
131 NvResult Close(DeviceFD fd); 131 NvResult Close(DeviceFD fd);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index b89a2d41b..c8c6a4d64 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -22,10 +22,16 @@ BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id)
22BufferQueue::~BufferQueue() = default; 22BufferQueue::~BufferQueue() = default;
23 23
24void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) { 24void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
25 ASSERT(slot < buffer_slots);
25 LOG_WARNING(Service, "Adding graphics buffer {}", slot); 26 LOG_WARNING(Service, "Adding graphics buffer {}", slot);
26 27
27 free_buffers.push_back(slot); 28 {
28 queue.push_back({ 29 std::unique_lock lock{queue_mutex};
30 free_buffers.push_back(slot);
31 }
32 condition.notify_one();
33
34 buffers[slot] = {
29 .slot = slot, 35 .slot = slot,
30 .status = Buffer::Status::Free, 36 .status = Buffer::Status::Free,
31 .igbp_buffer = igbp_buffer, 37 .igbp_buffer = igbp_buffer,
@@ -33,127 +39,139 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
33 .crop_rect = {}, 39 .crop_rect = {},
34 .swap_interval = 0, 40 .swap_interval = 0,
35 .multi_fence = {}, 41 .multi_fence = {},
36 }); 42 };
37 43
38 buffer_wait_event.writable->Signal(); 44 buffer_wait_event.writable->Signal();
39} 45}
40 46
41std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, 47std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
42 u32 height) { 48 u32 height) {
49 // Wait for first request before trying to dequeue
50 {
51 std::unique_lock lock{queue_mutex};
52 condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; });
53 }
43 54
44 if (free_buffers.empty()) { 55 if (!is_connect) {
56 // Buffer was disconnected while the thread was blocked, this is most likely due to
57 // emulation being stopped
45 return std::nullopt; 58 return std::nullopt;
46 } 59 }
47 60
61 std::unique_lock lock{queue_mutex};
62
48 auto f_itr = free_buffers.begin(); 63 auto f_itr = free_buffers.begin();
49 auto itr = queue.end(); 64 auto slot = buffers.size();
50 65
51 while (f_itr != free_buffers.end()) { 66 while (f_itr != free_buffers.end()) {
52 auto slot = *f_itr; 67 const Buffer& buffer = buffers[*f_itr];
53 itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { 68 if (buffer.status == Buffer::Status::Free && buffer.igbp_buffer.width == width &&
54 // Only consider free buffers. Buffers become free once again after they've been 69 buffer.igbp_buffer.height == height) {
55 // Acquired and Released by the compositor, see the NVFlinger::Compose method. 70 slot = *f_itr;
56 if (buffer.status != Buffer::Status::Free) {
57 return false;
58 }
59
60 if (buffer.slot != slot) {
61 return false;
62 }
63
64 // Make sure that the parameters match.
65 return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height;
66 });
67
68 if (itr != queue.end()) {
69 free_buffers.erase(f_itr); 71 free_buffers.erase(f_itr);
70 break; 72 break;
71 } 73 }
72 ++f_itr; 74 ++f_itr;
73 } 75 }
74 76 if (slot == buffers.size()) {
75 if (itr == queue.end()) {
76 return std::nullopt; 77 return std::nullopt;
77 } 78 }
78 79 buffers[slot].status = Buffer::Status::Dequeued;
79 itr->status = Buffer::Status::Dequeued; 80 return {{buffers[slot].slot, &buffers[slot].multi_fence}};
80 return {{itr->slot, &itr->multi_fence}};
81} 81}
82 82
83const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { 83const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
84 auto itr = std::find_if(queue.begin(), queue.end(), 84 ASSERT(slot < buffers.size());
85 [&](const Buffer& buffer) { return buffer.slot == slot; }); 85 ASSERT(buffers[slot].status == Buffer::Status::Dequeued);
86 ASSERT(itr != queue.end()); 86 ASSERT(buffers[slot].slot == slot);
87 ASSERT(itr->status == Buffer::Status::Dequeued); 87
88 return itr->igbp_buffer; 88 return buffers[slot].igbp_buffer;
89} 89}
90 90
91void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 91void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
92 const Common::Rectangle<int>& crop_rect, u32 swap_interval, 92 const Common::Rectangle<int>& crop_rect, u32 swap_interval,
93 Service::Nvidia::MultiFence& multi_fence) { 93 Service::Nvidia::MultiFence& multi_fence) {
94 auto itr = std::find_if(queue.begin(), queue.end(), 94 ASSERT(slot < buffers.size());
95 [&](const Buffer& buffer) { return buffer.slot == slot; }); 95 ASSERT(buffers[slot].status == Buffer::Status::Dequeued);
96 ASSERT(itr != queue.end()); 96 ASSERT(buffers[slot].slot == slot);
97 ASSERT(itr->status == Buffer::Status::Dequeued); 97
98 itr->status = Buffer::Status::Queued; 98 buffers[slot].status = Buffer::Status::Queued;
99 itr->transform = transform; 99 buffers[slot].transform = transform;
100 itr->crop_rect = crop_rect; 100 buffers[slot].crop_rect = crop_rect;
101 itr->swap_interval = swap_interval; 101 buffers[slot].swap_interval = swap_interval;
102 itr->multi_fence = multi_fence; 102 buffers[slot].multi_fence = multi_fence;
103 queue_sequence.push_back(slot); 103 queue_sequence.push_back(slot);
104} 104}
105 105
106void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence) { 106void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence) {
107 const auto itr = std::find_if(queue.begin(), queue.end(), 107 ASSERT(slot < buffers.size());
108 [slot](const Buffer& buffer) { return buffer.slot == slot; }); 108 ASSERT(buffers[slot].status != Buffer::Status::Free);
109 ASSERT(itr != queue.end()); 109 ASSERT(buffers[slot].slot == slot);
110 ASSERT(itr->status != Buffer::Status::Free);
111 itr->status = Buffer::Status::Free;
112 itr->multi_fence = multi_fence;
113 itr->swap_interval = 0;
114 110
115 free_buffers.push_back(slot); 111 buffers[slot].status = Buffer::Status::Free;
112 buffers[slot].multi_fence = multi_fence;
113 buffers[slot].swap_interval = 0;
114
115 {
116 std::unique_lock lock{queue_mutex};
117 free_buffers.push_back(slot);
118 }
119 condition.notify_one();
116 120
117 buffer_wait_event.writable->Signal(); 121 buffer_wait_event.writable->Signal();
118} 122}
119 123
120std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { 124std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
121 auto itr = queue.end(); 125 std::size_t buffer_slot = buffers.size();
122 // Iterate to find a queued buffer matching the requested slot. 126 // Iterate to find a queued buffer matching the requested slot.
123 while (itr == queue.end() && !queue_sequence.empty()) { 127 while (buffer_slot == buffers.size() && !queue_sequence.empty()) {
124 const u32 slot = queue_sequence.front(); 128 const auto slot = static_cast<std::size_t>(queue_sequence.front());
125 itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) { 129 ASSERT(slot < buffers.size());
126 return buffer.status == Buffer::Status::Queued && buffer.slot == slot; 130 if (buffers[slot].status == Buffer::Status::Queued) {
127 }); 131 ASSERT(buffers[slot].slot == slot);
132 buffer_slot = slot;
133 }
128 queue_sequence.pop_front(); 134 queue_sequence.pop_front();
129 } 135 }
130 if (itr == queue.end()) { 136 if (buffer_slot == buffers.size()) {
131 return std::nullopt; 137 return std::nullopt;
132 } 138 }
133 itr->status = Buffer::Status::Acquired; 139 buffers[buffer_slot].status = Buffer::Status::Acquired;
134 return *itr; 140 return {{buffers[buffer_slot]}};
135} 141}
136 142
137void BufferQueue::ReleaseBuffer(u32 slot) { 143void BufferQueue::ReleaseBuffer(u32 slot) {
138 auto itr = std::find_if(queue.begin(), queue.end(), 144 ASSERT(slot < buffers.size());
139 [&](const Buffer& buffer) { return buffer.slot == slot; }); 145 ASSERT(buffers[slot].status == Buffer::Status::Acquired);
140 ASSERT(itr != queue.end()); 146 ASSERT(buffers[slot].slot == slot);
141 ASSERT(itr->status == Buffer::Status::Acquired); 147
142 itr->status = Buffer::Status::Free; 148 buffers[slot].status = Buffer::Status::Free;
143 free_buffers.push_back(slot); 149 {
150 std::unique_lock lock{queue_mutex};
151 free_buffers.push_back(slot);
152 }
153 condition.notify_one();
144 154
145 buffer_wait_event.writable->Signal(); 155 buffer_wait_event.writable->Signal();
146} 156}
147 157
148void BufferQueue::Disconnect() { 158void BufferQueue::Connect() {
149 queue.clear();
150 queue_sequence.clear(); 159 queue_sequence.clear();
151 id = 1; 160 id = 1;
152 layer_id = 1; 161 layer_id = 1;
162 is_connect = true;
163}
164
165void BufferQueue::Disconnect() {
166 buffers.fill({});
167 queue_sequence.clear();
168 buffer_wait_event.writable->Signal();
169 is_connect = false;
170 condition.notify_one();
153} 171}
154 172
155u32 BufferQueue::Query(QueryType type) { 173u32 BufferQueue::Query(QueryType type) {
156 LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); 174 LOG_WARNING(Service, "(STUBBED) called type={}", type);
157 175
158 switch (type) { 176 switch (type) {
159 case QueryType::NativeWindowFormat: 177 case QueryType::NativeWindowFormat:
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index e7517c7e1..a2f60d9eb 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,7 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <condition_variable>
7#include <list> 8#include <list>
9#include <mutex>
8#include <optional> 10#include <optional>
9#include <vector> 11#include <vector>
10 12
@@ -21,6 +23,7 @@ class KernelCore;
21 23
22namespace Service::NVFlinger { 24namespace Service::NVFlinger {
23 25
26constexpr u32 buffer_slots = 0x40;
24struct IGBPBuffer { 27struct IGBPBuffer {
25 u32_le magic; 28 u32_le magic;
26 u32_le width; 29 u32_le width;
@@ -98,6 +101,7 @@ public:
98 void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence); 101 void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence);
99 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 102 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
100 void ReleaseBuffer(u32 slot); 103 void ReleaseBuffer(u32 slot);
104 void Connect();
101 void Disconnect(); 105 void Disconnect();
102 u32 Query(QueryType type); 106 u32 Query(QueryType type);
103 107
@@ -105,18 +109,28 @@ public:
105 return id; 109 return id;
106 } 110 }
107 111
112 bool IsConnected() const {
113 return is_connect;
114 }
115
108 std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const; 116 std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const;
109 117
110 std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const; 118 std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const;
111 119
112private: 120private:
113 u32 id; 121 BufferQueue(const BufferQueue&) = delete;
114 u64 layer_id; 122
123 u32 id{};
124 u64 layer_id{};
125 std::atomic_bool is_connect{};
115 126
116 std::list<u32> free_buffers; 127 std::list<u32> free_buffers;
117 std::vector<Buffer> queue; 128 std::array<Buffer, buffer_slots> buffers;
118 std::list<u32> queue_sequence; 129 std::list<u32> queue_sequence;
119 Kernel::EventPair buffer_wait_event; 130 Kernel::EventPair buffer_wait_event;
131
132 std::mutex queue_mutex;
133 std::condition_variable condition;
120}; 134};
121 135
122} // namespace Service::NVFlinger 136} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 44aa2bdae..4b3581949 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
88} 88}
89 89
90NVFlinger::~NVFlinger() { 90NVFlinger::~NVFlinger() {
91 for (auto& buffer_queue : buffer_queues) {
92 buffer_queue->Disconnect();
93 }
94
91 if (system.IsMulticore()) { 95 if (system.IsMulticore()) {
92 is_running = false; 96 is_running = false;
93 wait_event->Set(); 97 wait_event->Set();
@@ -104,6 +108,8 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
104} 108}
105 109
106std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { 110std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
111 const auto guard = Lock();
112
107 LOG_DEBUG(Service, "Opening \"{}\" display", name); 113 LOG_DEBUG(Service, "Opening \"{}\" display", name);
108 114
109 // TODO(Subv): Currently we only support the Default display. 115 // TODO(Subv): Currently we only support the Default display.
@@ -121,6 +127,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
121} 127}
122 128
123std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { 129std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
130 const auto guard = Lock();
124 auto* const display = FindDisplay(display_id); 131 auto* const display = FindDisplay(display_id);
125 132
126 if (display == nullptr) { 133 if (display == nullptr) {
@@ -129,18 +136,22 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
129 136
130 const u64 layer_id = next_layer_id++; 137 const u64 layer_id = next_layer_id++;
131 const u32 buffer_queue_id = next_buffer_queue_id++; 138 const u32 buffer_queue_id = next_buffer_queue_id++;
132 buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id); 139 buffer_queues.emplace_back(
133 display->CreateLayer(layer_id, buffer_queues.back()); 140 std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id));
141 display->CreateLayer(layer_id, *buffer_queues.back());
134 return layer_id; 142 return layer_id;
135} 143}
136 144
137void NVFlinger::CloseLayer(u64 layer_id) { 145void NVFlinger::CloseLayer(u64 layer_id) {
146 const auto guard = Lock();
147
138 for (auto& display : displays) { 148 for (auto& display : displays) {
139 display.CloseLayer(layer_id); 149 display.CloseLayer(layer_id);
140 } 150 }
141} 151}
142 152
143std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { 153std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
154 const auto guard = Lock();
144 const auto* const layer = FindLayer(display_id, layer_id); 155 const auto* const layer = FindLayer(display_id, layer_id);
145 156
146 if (layer == nullptr) { 157 if (layer == nullptr) {
@@ -151,6 +162,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
151} 162}
152 163
153std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { 164std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
165 const auto guard = Lock();
154 auto* const display = FindDisplay(display_id); 166 auto* const display = FindDisplay(display_id);
155 167
156 if (display == nullptr) { 168 if (display == nullptr) {
@@ -160,20 +172,16 @@ std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id)
160 return display->GetVSyncEvent(); 172 return display->GetVSyncEvent();
161} 173}
162 174
163BufferQueue& NVFlinger::FindBufferQueue(u32 id) { 175BufferQueue* NVFlinger::FindBufferQueue(u32 id) {
176 const auto guard = Lock();
164 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), 177 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
165 [id](const auto& queue) { return queue.GetId() == id; }); 178 [id](const auto& queue) { return queue->GetId() == id; });
166 179
167 ASSERT(itr != buffer_queues.end()); 180 if (itr == buffer_queues.end()) {
168 return *itr; 181 return nullptr;
169} 182 }
170
171const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
172 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
173 [id](const auto& queue) { return queue.GetId() == id; });
174 183
175 ASSERT(itr != buffer_queues.end()); 184 return itr->get();
176 return *itr;
177} 185}
178 186
179VI::Display* NVFlinger::FindDisplay(u64 display_id) { 187VI::Display* NVFlinger::FindDisplay(u64 display_id) {
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 1ebe949c0..c6765259f 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -75,10 +75,7 @@ public:
75 [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; 75 [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
76 76
77 /// Obtains a buffer queue identified by the ID. 77 /// Obtains a buffer queue identified by the ID.
78 [[nodiscard]] BufferQueue& FindBufferQueue(u32 id); 78 [[nodiscard]] BufferQueue* FindBufferQueue(u32 id);
79
80 /// Obtains a buffer queue identified by the ID.
81 [[nodiscard]] const BufferQueue& FindBufferQueue(u32 id) const;
82 79
83 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when 80 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
84 /// finished. 81 /// finished.
@@ -86,11 +83,11 @@ public:
86 83
87 [[nodiscard]] s64 GetNextTicks() const; 84 [[nodiscard]] s64 GetNextTicks() const;
88 85
86private:
89 [[nodiscard]] std::unique_lock<std::mutex> Lock() const { 87 [[nodiscard]] std::unique_lock<std::mutex> Lock() const {
90 return std::unique_lock{*guard}; 88 return std::unique_lock{*guard};
91 } 89 }
92 90
93private:
94 /// Finds the display identified by the specified ID. 91 /// Finds the display identified by the specified ID.
95 [[nodiscard]] VI::Display* FindDisplay(u64 display_id); 92 [[nodiscard]] VI::Display* FindDisplay(u64 display_id);
96 93
@@ -110,7 +107,7 @@ private:
110 std::shared_ptr<Nvidia::Module> nvdrv; 107 std::shared_ptr<Nvidia::Module> nvdrv;
111 108
112 std::vector<VI::Display> displays; 109 std::vector<VI::Display> displays;
113 std::vector<BufferQueue> buffer_queues; 110 std::vector<std::unique_ptr<BufferQueue>> buffer_queues;
114 111
115 /// Id to use for the next layer that is created, this counter is shared among all displays. 112 /// Id to use for the next layer that is created, this counter is shared among all displays.
116 u64 next_layer_id = 1; 113 u64 next_layer_id = 1;
diff --git a/src/core/hle/service/pcie/pcie.cpp b/src/core/hle/service/pcie/pcie.cpp
index 80c0fc7ac..f6686fc4d 100644
--- a/src/core/hle/service/pcie/pcie.cpp
+++ b/src/core/hle/service/pcie/pcie.cpp
@@ -48,7 +48,7 @@ public:
48 48
49class PCIe final : public ServiceFramework<PCIe> { 49class PCIe final : public ServiceFramework<PCIe> {
50public: 50public:
51 explicit PCIe(Core::System& system_) : ServiceFramework{system, "pcie"} { 51 explicit PCIe(Core::System& system_) : ServiceFramework{system_, "pcie"} {
52 // clang-format off 52 // clang-format off
53 static const FunctionInfo functions[] = { 53 static const FunctionInfo functions[] = {
54 {0, nullptr, "RegisterClassDriver"}, 54 {0, nullptr, "RegisterClassDriver"},
diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp
index 392fda73e..b417624c9 100644
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -65,7 +65,7 @@ private:
65 } 65 }
66 66
67 LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}", 67 LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}",
68 static_cast<u8>(Type), process_id, data[0].size()); 68 Type, process_id, data[0].size());
69 69
70 const auto& reporter{system.GetReporter()}; 70 const auto& reporter{system.GetReporter()};
71 reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id); 71 reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id);
@@ -92,7 +92,7 @@ private:
92 LOG_DEBUG( 92 LOG_DEBUG(
93 Service_PREPO, 93 Service_PREPO,
94 "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}", 94 "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}",
95 static_cast<u8>(Type), user_id[1], user_id[0], process_id, data[0].size()); 95 Type, user_id[1], user_id[0], process_id, data[0].size());
96 96
97 const auto& reporter{system.GetReporter()}; 97 const auto& reporter{system.GetReporter()};
98 reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id, 98 reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id,
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 360e0bf37..ff2a5b1db 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -95,9 +95,14 @@ ServiceFrameworkBase::ServiceFrameworkBase(Core::System& system_, const char* se
95 : system{system_}, service_name{service_name_}, max_sessions{max_sessions_}, 95 : system{system_}, service_name{service_name_}, max_sessions{max_sessions_},
96 handler_invoker{handler_invoker_} {} 96 handler_invoker{handler_invoker_} {}
97 97
98ServiceFrameworkBase::~ServiceFrameworkBase() = default; 98ServiceFrameworkBase::~ServiceFrameworkBase() {
99 // Wait for other threads to release access before destroying
100 const auto guard = LockService();
101}
99 102
100void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { 103void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) {
104 const auto guard = LockService();
105
101 ASSERT(!port_installed); 106 ASSERT(!port_installed);
102 107
103 auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap(); 108 auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap();
@@ -106,6 +111,8 @@ void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager)
106} 111}
107 112
108void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) { 113void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) {
114 const auto guard = LockService();
115
109 ASSERT(!port_installed); 116 ASSERT(!port_installed);
110 117
111 auto [server_port, client_port] = 118 auto [server_port, client_port] =
@@ -115,17 +122,6 @@ void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) {
115 port_installed = true; 122 port_installed = true;
116} 123}
117 124
118std::shared_ptr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort(Kernel::KernelCore& kernel) {
119 ASSERT(!port_installed);
120
121 auto [server_port, client_port] =
122 Kernel::ServerPort::CreatePortPair(kernel, max_sessions, service_name);
123 auto port = MakeResult(std::move(server_port)).Unwrap();
124 port->SetHleHandler(shared_from_this());
125 port_installed = true;
126 return client_port;
127}
128
129void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) { 125void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) {
130 handlers.reserve(handlers.size() + n); 126 handlers.reserve(handlers.size() + n);
131 for (std::size_t i = 0; i < n; ++i) { 127 for (std::size_t i = 0; i < n; ++i) {
@@ -164,6 +160,8 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
164} 160}
165 161
166ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) { 162ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) {
163 const auto guard = LockService();
164
167 switch (context.GetCommandType()) { 165 switch (context.GetCommandType()) {
168 case IPC::CommandType::Close: { 166 case IPC::CommandType::Close: {
169 IPC::ResponseBuilder rb{context, 2}; 167 IPC::ResponseBuilder rb{context, 2};
@@ -181,10 +179,14 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
181 break; 179 break;
182 } 180 }
183 default: 181 default:
184 UNIMPLEMENTED_MSG("command_type={}", static_cast<int>(context.GetCommandType())); 182 UNIMPLEMENTED_MSG("command_type={}", context.GetCommandType());
185 } 183 }
186 184
187 context.WriteToOutgoingCommandBuffer(context.GetThread()); 185 // If emulation was shutdown, we are closing service threads, do not write the response back to
186 // memory that may be shutting down as well.
187 if (system.IsPoweredOn()) {
188 context.WriteToOutgoingCommandBuffer(context.GetThread());
189 }
188 190
189 return RESULT_SUCCESS; 191 return RESULT_SUCCESS;
190} 192}
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 62a182310..916445517 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -5,9 +5,11 @@
5#pragma once 5#pragma once
6 6
7#include <cstddef> 7#include <cstddef>
8#include <mutex>
8#include <string> 9#include <string>
9#include <boost/container/flat_map.hpp> 10#include <boost/container/flat_map.hpp>
10#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/spin_lock.h"
11#include "core/hle/kernel/hle_ipc.h" 13#include "core/hle/kernel/hle_ipc.h"
12#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
13 15
@@ -68,11 +70,9 @@ public:
68 void InstallAsService(SM::ServiceManager& service_manager); 70 void InstallAsService(SM::ServiceManager& service_manager);
69 /// Creates a port pair and registers it on the kernel's global port registry. 71 /// Creates a port pair and registers it on the kernel's global port registry.
70 void InstallAsNamedPort(Kernel::KernelCore& kernel); 72 void InstallAsNamedPort(Kernel::KernelCore& kernel);
71 /// Creates and returns an unregistered port for the service. 73 /// Invokes a service request routine.
72 std::shared_ptr<Kernel::ClientPort> CreatePort(Kernel::KernelCore& kernel);
73
74 void InvokeRequest(Kernel::HLERequestContext& ctx); 74 void InvokeRequest(Kernel::HLERequestContext& ctx);
75 75 /// Handles a synchronization request for the service.
76 ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override; 76 ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override;
77 77
78protected: 78protected:
@@ -80,6 +80,11 @@ protected:
80 template <typename Self> 80 template <typename Self>
81 using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&); 81 using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&);
82 82
83 /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread.
84 [[nodiscard]] std::scoped_lock<Common::SpinLock> LockService() {
85 return std::scoped_lock{lock_service};
86 }
87
83 /// System context that the service operates under. 88 /// System context that the service operates under.
84 Core::System& system; 89 Core::System& system;
85 90
@@ -115,6 +120,9 @@ private:
115 /// Function used to safely up-cast pointers to the derived class before invoking a handler. 120 /// Function used to safely up-cast pointers to the derived class before invoking a handler.
116 InvokerFn* handler_invoker; 121 InvokerFn* handler_invoker;
117 boost::container::flat_map<u32, FunctionInfoBase> handlers; 122 boost::container::flat_map<u32, FunctionInfoBase> handlers;
123
124 /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread.
125 Common::SpinLock lock_service;
118}; 126};
119 127
120/** 128/**
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index 19b8f113d..b58b2c8c5 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -34,9 +34,9 @@ void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionTy
34 // consistence (currently reports as 5.1.0-0.0) 34 // consistence (currently reports as 5.1.0-0.0)
35 const auto archive = FileSys::SystemArchive::SystemVersion(); 35 const auto archive = FileSys::SystemArchive::SystemVersion();
36 36
37 const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) { 37 const auto early_exit_failure = [&ctx](std::string_view desc, ResultCode code) {
38 LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).", 38 LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).",
39 desc.c_str()); 39 desc);
40 IPC::ResponseBuilder rb{ctx, 2}; 40 IPC::ResponseBuilder rb{ctx, 2};
41 rb.Push(code); 41 rb.Push(code);
42 }; 42 };
diff --git a/src/core/hle/service/sockets/blocking_worker.h b/src/core/hle/service/sockets/blocking_worker.h
deleted file mode 100644
index 2d53e52b6..000000000
--- a/src/core/hle/service/sockets/blocking_worker.h
+++ /dev/null
@@ -1,161 +0,0 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <memory>
9#include <string>
10#include <string_view>
11#include <thread>
12#include <variant>
13#include <vector>
14
15#include <fmt/format.h>
16
17#include "common/assert.h"
18#include "common/microprofile.h"
19#include "common/thread.h"
20#include "core/core.h"
21#include "core/hle/kernel/hle_ipc.h"
22#include "core/hle/kernel/kernel.h"
23#include "core/hle/kernel/thread.h"
24#include "core/hle/kernel/writable_event.h"
25
26namespace Service::Sockets {
27
28/**
29 * Worker abstraction to execute blocking calls on host without blocking the guest thread
30 *
31 * @tparam Service Service where the work is executed
32 * @tparam Types Types of work to execute
33 */
34template <class Service, class... Types>
35class BlockingWorker {
36 using This = BlockingWorker<Service, Types...>;
37 using WorkVariant = std::variant<std::monostate, Types...>;
38
39public:
40 /// Create a new worker
41 static std::unique_ptr<This> Create(Core::System& system, Service* service,
42 std::string_view name) {
43 return std::unique_ptr<This>(new This(system, service, name));
44 }
45
46 ~BlockingWorker() {
47 while (!is_available.load(std::memory_order_relaxed)) {
48 // Busy wait until work is finished
49 std::this_thread::yield();
50 }
51 // Monostate means to exit the thread
52 work = std::monostate{};
53 work_event.Set();
54 thread.join();
55 }
56
57 /**
58 * Try to capture the worker to send work after a success
59 * @returns True when the worker has been successfully captured
60 */
61 bool TryCapture() {
62 bool expected = true;
63 return is_available.compare_exchange_weak(expected, false, std::memory_order_relaxed,
64 std::memory_order_relaxed);
65 }
66
67 /**
68 * Send work to this worker abstraction
69 * @see TryCapture must be called before attempting to call this function
70 */
71 template <class Work>
72 void SendWork(Work new_work) {
73 ASSERT_MSG(!is_available, "Trying to send work on a worker that's not captured");
74 work = std::move(new_work);
75 work_event.Set();
76 }
77
78 /// Generate a callback for @see SleepClientThread
79 template <class Work>
80 auto Callback() {
81 return [this](std::shared_ptr<Kernel::Thread>, Kernel::HLERequestContext& ctx,
82 Kernel::ThreadWakeupReason reason) {
83 ASSERT(reason == Kernel::ThreadWakeupReason::Signal);
84 std::get<Work>(work).Response(ctx);
85 is_available.store(true);
86 };
87 }
88
89 /// Get kernel event that will be signalled by the worker when the host operation finishes
90 std::shared_ptr<Kernel::WritableEvent> KernelEvent() const {
91 return kernel_event;
92 }
93
94private:
95 explicit BlockingWorker(Core::System& system, Service* service, std::string_view name) {
96 auto pair = Kernel::WritableEvent::CreateEventPair(system.Kernel(), std::string(name));
97 kernel_event = std::move(pair.writable);
98 thread = std::thread([this, &system, service, name] { Run(system, service, name); });
99 }
100
101 void Run(Core::System& system, Service* service, std::string_view name) {
102 system.RegisterHostThread();
103
104 const std::string thread_name = fmt::format("yuzu:{}", name);
105 MicroProfileOnThreadCreate(thread_name.c_str());
106 Common::SetCurrentThreadName(thread_name.c_str());
107
108 bool keep_running = true;
109 while (keep_running) {
110 work_event.Wait();
111
112 const auto visit_fn = [service, &keep_running]<typename T>(T&& w) {
113 if constexpr (std::is_same_v<std::decay_t<T>, std::monostate>) {
114 keep_running = false;
115 } else {
116 w.Execute(service);
117 }
118 };
119 std::visit(visit_fn, work);
120
121 kernel_event->Signal();
122 }
123 }
124
125 std::thread thread;
126 WorkVariant work;
127 Common::Event work_event;
128 std::shared_ptr<Kernel::WritableEvent> kernel_event;
129 std::atomic_bool is_available{true};
130};
131
132template <class Service, class... Types>
133class BlockingWorkerPool {
134 using Worker = BlockingWorker<Service, Types...>;
135
136public:
137 explicit BlockingWorkerPool(Core::System& system_, Service* service_)
138 : system{system_}, service{service_} {}
139
140 /// Returns a captured worker thread, creating new ones if necessary
141 Worker* CaptureWorker() {
142 for (auto& worker : workers) {
143 if (worker->TryCapture()) {
144 return worker.get();
145 }
146 }
147 auto new_worker = Worker::Create(system, service, fmt::format("BSD:{}", workers.size()));
148 [[maybe_unused]] const bool success = new_worker->TryCapture();
149 ASSERT(success);
150
151 return workers.emplace_back(std::move(new_worker)).get();
152 }
153
154private:
155 Core::System& system;
156 Service* const service;
157
158 std::vector<std::unique_ptr<Worker>> workers;
159};
160
161} // namespace Service::Sockets
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index a9875b9a6..2b824059d 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -30,7 +30,7 @@ bool IsConnectionBased(Type type) {
30 case Type::DGRAM: 30 case Type::DGRAM:
31 return false; 31 return false;
32 default: 32 default:
33 UNIMPLEMENTED_MSG("Unimplemented type={}", static_cast<int>(type)); 33 UNIMPLEMENTED_MSG("Unimplemented type={}", type);
34 return false; 34 return false;
35 } 35 }
36} 36}
@@ -178,13 +178,12 @@ void BSD::Poll(Kernel::HLERequestContext& ctx) {
178 178
179 LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout); 179 LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout);
180 180
181 ExecuteWork(ctx, "BSD:Poll", timeout != 0, 181 ExecuteWork(ctx, PollWork{
182 PollWork{ 182 .nfds = nfds,
183 .nfds = nfds, 183 .timeout = timeout,
184 .timeout = timeout, 184 .read_buffer = ctx.ReadBuffer(),
185 .read_buffer = ctx.ReadBuffer(), 185 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
186 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), 186 });
187 });
188} 187}
189 188
190void BSD::Accept(Kernel::HLERequestContext& ctx) { 189void BSD::Accept(Kernel::HLERequestContext& ctx) {
@@ -193,11 +192,10 @@ void BSD::Accept(Kernel::HLERequestContext& ctx) {
193 192
194 LOG_DEBUG(Service, "called. fd={}", fd); 193 LOG_DEBUG(Service, "called. fd={}", fd);
195 194
196 ExecuteWork(ctx, "BSD:Accept", IsBlockingSocket(fd), 195 ExecuteWork(ctx, AcceptWork{
197 AcceptWork{ 196 .fd = fd,
198 .fd = fd, 197 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
199 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), 198 });
200 });
201} 199}
202 200
203void BSD::Bind(Kernel::HLERequestContext& ctx) { 201void BSD::Bind(Kernel::HLERequestContext& ctx) {
@@ -215,11 +213,10 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) {
215 213
216 LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize()); 214 LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize());
217 215
218 ExecuteWork(ctx, "BSD:Connect", IsBlockingSocket(fd), 216 ExecuteWork(ctx, ConnectWork{
219 ConnectWork{ 217 .fd = fd,
220 .fd = fd, 218 .addr = ctx.ReadBuffer(),
221 .addr = ctx.ReadBuffer(), 219 });
222 });
223} 220}
224 221
225void BSD::GetPeerName(Kernel::HLERequestContext& ctx) { 222void BSD::GetPeerName(Kernel::HLERequestContext& ctx) {
@@ -327,12 +324,11 @@ void BSD::Recv(Kernel::HLERequestContext& ctx) {
327 324
328 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize()); 325 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize());
329 326
330 ExecuteWork(ctx, "BSD:Recv", IsBlockingSocket(fd), 327 ExecuteWork(ctx, RecvWork{
331 RecvWork{ 328 .fd = fd,
332 .fd = fd, 329 .flags = flags,
333 .flags = flags, 330 .message = std::vector<u8>(ctx.GetWriteBufferSize()),
334 .message = std::vector<u8>(ctx.GetWriteBufferSize()), 331 });
335 });
336} 332}
337 333
338void BSD::RecvFrom(Kernel::HLERequestContext& ctx) { 334void BSD::RecvFrom(Kernel::HLERequestContext& ctx) {
@@ -344,13 +340,12 @@ void BSD::RecvFrom(Kernel::HLERequestContext& ctx) {
344 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags, 340 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags,
345 ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1)); 341 ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1));
346 342
347 ExecuteWork(ctx, "BSD:RecvFrom", IsBlockingSocket(fd), 343 ExecuteWork(ctx, RecvFromWork{
348 RecvFromWork{ 344 .fd = fd,
349 .fd = fd, 345 .flags = flags,
350 .flags = flags, 346 .message = std::vector<u8>(ctx.GetWriteBufferSize(0)),
351 .message = std::vector<u8>(ctx.GetWriteBufferSize(0)), 347 .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)),
352 .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)), 348 });
353 });
354} 349}
355 350
356void BSD::Send(Kernel::HLERequestContext& ctx) { 351void BSD::Send(Kernel::HLERequestContext& ctx) {
@@ -361,12 +356,11 @@ void BSD::Send(Kernel::HLERequestContext& ctx) {
361 356
362 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize()); 357 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize());
363 358
364 ExecuteWork(ctx, "BSD:Send", IsBlockingSocket(fd), 359 ExecuteWork(ctx, SendWork{
365 SendWork{ 360 .fd = fd,
366 .fd = fd, 361 .flags = flags,
367 .flags = flags, 362 .message = ctx.ReadBuffer(),
368 .message = ctx.ReadBuffer(), 363 });
369 });
370} 364}
371 365
372void BSD::SendTo(Kernel::HLERequestContext& ctx) { 366void BSD::SendTo(Kernel::HLERequestContext& ctx) {
@@ -377,13 +371,12 @@ void BSD::SendTo(Kernel::HLERequestContext& ctx) {
377 LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags, 371 LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags,
378 ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1)); 372 ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1));
379 373
380 ExecuteWork(ctx, "BSD:SendTo", IsBlockingSocket(fd), 374 ExecuteWork(ctx, SendToWork{
381 SendToWork{ 375 .fd = fd,
382 .fd = fd, 376 .flags = flags,
383 .flags = flags, 377 .message = ctx.ReadBuffer(0),
384 .message = ctx.ReadBuffer(0), 378 .addr = ctx.ReadBuffer(1),
385 .addr = ctx.ReadBuffer(1), 379 });
386 });
387} 380}
388 381
389void BSD::Write(Kernel::HLERequestContext& ctx) { 382void BSD::Write(Kernel::HLERequestContext& ctx) {
@@ -392,12 +385,11 @@ void BSD::Write(Kernel::HLERequestContext& ctx) {
392 385
393 LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize()); 386 LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize());
394 387
395 ExecuteWork(ctx, "BSD:Write", IsBlockingSocket(fd), 388 ExecuteWork(ctx, SendWork{
396 SendWork{ 389 .fd = fd,
397 .fd = fd, 390 .flags = 0,
398 .flags = 0, 391 .message = ctx.ReadBuffer(),
399 .message = ctx.ReadBuffer(), 392 });
400 });
401} 393}
402 394
403void BSD::Close(Kernel::HLERequestContext& ctx) { 395void BSD::Close(Kernel::HLERequestContext& ctx) {
@@ -410,24 +402,9 @@ void BSD::Close(Kernel::HLERequestContext& ctx) {
410} 402}
411 403
412template <typename Work> 404template <typename Work>
413void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, 405void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, Work work) {
414 bool is_blocking, Work work) { 406 work.Execute(this);
415 if (!is_blocking) {
416 work.Execute(this);
417 work.Response(ctx);
418 return;
419 }
420
421 // Signal a dummy response to make IPC validation happy
422 // This will be overwritten by the SleepClientThread callback
423 work.Response(ctx); 407 work.Response(ctx);
424
425 auto worker = worker_pool.CaptureWorker();
426
427 ctx.SleepClientThread(std::string(sleep_reason), std::numeric_limits<u64>::max(),
428 worker->Callback<Work>(), worker->KernelEvent());
429
430 worker->SendWork(std::move(work));
431} 408}
432 409
433std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) { 410std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) {
@@ -489,18 +466,18 @@ std::pair<s32, Errno> BSD::PollImpl(std::vector<u8>& write_buffer, std::vector<u
489 } 466 }
490 467
491 for (PollFD& pollfd : fds) { 468 for (PollFD& pollfd : fds) {
492 ASSERT(pollfd.revents == 0); 469 ASSERT(False(pollfd.revents));
493 470
494 if (pollfd.fd > static_cast<s32>(MAX_FD) || pollfd.fd < 0) { 471 if (pollfd.fd > static_cast<s32>(MAX_FD) || pollfd.fd < 0) {
495 LOG_ERROR(Service, "File descriptor handle={} is invalid", pollfd.fd); 472 LOG_ERROR(Service, "File descriptor handle={} is invalid", pollfd.fd);
496 pollfd.revents = 0; 473 pollfd.revents = PollEvents{};
497 return {0, Errno::SUCCESS}; 474 return {0, Errno::SUCCESS};
498 } 475 }
499 476
500 const std::optional<FileDescriptor>& descriptor = file_descriptors[pollfd.fd]; 477 const std::optional<FileDescriptor>& descriptor = file_descriptors[pollfd.fd];
501 if (!descriptor) { 478 if (!descriptor) {
502 LOG_ERROR(Service, "File descriptor handle={} is not allocated", pollfd.fd); 479 LOG_ERROR(Service, "File descriptor handle={} is not allocated", pollfd.fd);
503 pollfd.revents = POLL_NVAL; 480 pollfd.revents = PollEvents::Nval;
504 return {0, Errno::SUCCESS}; 481 return {0, Errno::SUCCESS};
505 } 482 }
506 } 483 }
@@ -510,7 +487,7 @@ std::pair<s32, Errno> BSD::PollImpl(std::vector<u8>& write_buffer, std::vector<u
510 Network::PollFD result; 487 Network::PollFD result;
511 result.socket = file_descriptors[pollfd.fd]->socket.get(); 488 result.socket = file_descriptors[pollfd.fd]->socket.get();
512 result.events = TranslatePollEventsToHost(pollfd.events); 489 result.events = TranslatePollEventsToHost(pollfd.events);
513 result.revents = 0; 490 result.revents = Network::PollEvents{};
514 return result; 491 return result;
515 }); 492 });
516 493
@@ -636,7 +613,7 @@ std::pair<s32, Errno> BSD::FcntlImpl(s32 fd, FcntlCmd cmd, s32 arg) {
636 return {0, Errno::SUCCESS}; 613 return {0, Errno::SUCCESS};
637 } 614 }
638 default: 615 default:
639 UNIMPLEMENTED_MSG("Unimplemented cmd={}", static_cast<int>(cmd)); 616 UNIMPLEMENTED_MSG("Unimplemented cmd={}", cmd);
640 return {-1, Errno::SUCCESS}; 617 return {-1, Errno::SUCCESS};
641 } 618 }
642} 619}
@@ -679,7 +656,7 @@ Errno BSD::SetSockOptImpl(s32 fd, u32 level, OptName optname, size_t optlen, con
679 case OptName::RCVTIMEO: 656 case OptName::RCVTIMEO:
680 return Translate(socket->SetRcvTimeo(value)); 657 return Translate(socket->SetRcvTimeo(value));
681 default: 658 default:
682 UNIMPLEMENTED_MSG("Unimplemented optname={}", static_cast<int>(optname)); 659 UNIMPLEMENTED_MSG("Unimplemented optname={}", optname);
683 return Errno::SUCCESS; 660 return Errno::SUCCESS;
684 } 661 }
685} 662}
@@ -807,18 +784,6 @@ bool BSD::IsFileDescriptorValid(s32 fd) const noexcept {
807 return true; 784 return true;
808} 785}
809 786
810bool BSD::IsBlockingSocket(s32 fd) const noexcept {
811 // Inform invalid sockets as non-blocking
812 // This way we avoid using a worker thread as it will fail without blocking host
813 if (fd > static_cast<s32>(MAX_FD) || fd < 0) {
814 return false;
815 }
816 if (!file_descriptors[fd]) {
817 return false;
818 }
819 return (file_descriptors[fd]->flags & FLAG_O_NONBLOCK) != 0;
820}
821
822void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept { 787void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept {
823 IPC::ResponseBuilder rb{ctx, 4}; 788 IPC::ResponseBuilder rb{ctx, 4};
824 789
@@ -827,8 +792,7 @@ void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) co
827 rb.PushEnum(bsd_errno); 792 rb.PushEnum(bsd_errno);
828} 793}
829 794
830BSD::BSD(Core::System& system_, const char* name) 795BSD::BSD(Core::System& system_, const char* name) : ServiceFramework{system_, name} {
831 : ServiceFramework{system_, name}, worker_pool{system_, this} {
832 // clang-format off 796 // clang-format off
833 static const FunctionInfo functions[] = { 797 static const FunctionInfo functions[] = {
834 {0, &BSD::RegisterClient, "RegisterClient"}, 798 {0, &BSD::RegisterClient, "RegisterClient"},
diff --git a/src/core/hle/service/sockets/bsd.h b/src/core/hle/service/sockets/bsd.h
index f14713fc4..6da0bfeb2 100644
--- a/src/core/hle/service/sockets/bsd.h
+++ b/src/core/hle/service/sockets/bsd.h
@@ -11,7 +11,6 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/hle_ipc.h" 12#include "core/hle/kernel/hle_ipc.h"
13#include "core/hle/service/service.h" 13#include "core/hle/service/service.h"
14#include "core/hle/service/sockets/blocking_worker.h"
15#include "core/hle/service/sockets/sockets.h" 14#include "core/hle/service/sockets/sockets.h"
16 15
17namespace Core { 16namespace Core {
@@ -138,8 +137,7 @@ private:
138 void Close(Kernel::HLERequestContext& ctx); 137 void Close(Kernel::HLERequestContext& ctx);
139 138
140 template <typename Work> 139 template <typename Work>
141 void ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, 140 void ExecuteWork(Kernel::HLERequestContext& ctx, Work work);
142 bool is_blocking, Work work);
143 141
144 std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol); 142 std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol);
145 std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer, 143 std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer,
@@ -163,15 +161,10 @@ private:
163 161
164 s32 FindFreeFileDescriptorHandle() noexcept; 162 s32 FindFreeFileDescriptorHandle() noexcept;
165 bool IsFileDescriptorValid(s32 fd) const noexcept; 163 bool IsFileDescriptorValid(s32 fd) const noexcept;
166 bool IsBlockingSocket(s32 fd) const noexcept;
167 164
168 void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept; 165 void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept;
169 166
170 std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors; 167 std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors;
171
172 BlockingWorkerPool<BSD, PollWork, AcceptWork, ConnectWork, RecvWork, RecvFromWork, SendWork,
173 SendToWork>
174 worker_pool;
175}; 168};
176 169
177class BSDCFG final : public ServiceFramework<BSDCFG> { 170class BSDCFG final : public ServiceFramework<BSDCFG> {
diff --git a/src/core/hle/service/sockets/sockets.h b/src/core/hle/service/sockets/sockets.h
index 89a410076..5a65ed2a9 100644
--- a/src/core/hle/service/sockets/sockets.h
+++ b/src/core/hle/service/sockets/sockets.h
@@ -69,10 +69,22 @@ struct SockAddrIn {
69 std::array<u8, 8> zeroes; 69 std::array<u8, 8> zeroes;
70}; 70};
71 71
72enum class PollEvents : u16 {
73 // Using Pascal case because IN is a macro on Windows.
74 In = 1 << 0,
75 Pri = 1 << 1,
76 Out = 1 << 2,
77 Err = 1 << 3,
78 Hup = 1 << 4,
79 Nval = 1 << 5,
80};
81
82DECLARE_ENUM_FLAG_OPERATORS(PollEvents);
83
72struct PollFD { 84struct PollFD {
73 s32 fd; 85 s32 fd;
74 u16 events; 86 PollEvents events;
75 u16 revents; 87 PollEvents revents;
76}; 88};
77 89
78struct Linger { 90struct Linger {
@@ -80,13 +92,6 @@ struct Linger {
80 u32 linger; 92 u32 linger;
81}; 93};
82 94
83constexpr u16 POLL_IN = 0x01;
84constexpr u16 POLL_PRI = 0x02;
85constexpr u16 POLL_OUT = 0x04;
86constexpr u16 POLL_ERR = 0x08;
87constexpr u16 POLL_HUP = 0x10;
88constexpr u16 POLL_NVAL = 0x20;
89
90constexpr u32 FLAG_MSG_DONTWAIT = 0x80; 95constexpr u32 FLAG_MSG_DONTWAIT = 0x80;
91 96
92constexpr u32 FLAG_O_NONBLOCK = 0x800; 97constexpr u32 FLAG_O_NONBLOCK = 0x800;
diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp
index 2e626fd86..c822d21b8 100644
--- a/src/core/hle/service/sockets/sockets_translate.cpp
+++ b/src/core/hle/service/sockets/sockets_translate.cpp
@@ -27,7 +27,7 @@ Errno Translate(Network::Errno value) {
27 case Network::Errno::NOTCONN: 27 case Network::Errno::NOTCONN:
28 return Errno::NOTCONN; 28 return Errno::NOTCONN;
29 default: 29 default:
30 UNIMPLEMENTED_MSG("Unimplemented errno={}", static_cast<int>(value)); 30 UNIMPLEMENTED_MSG("Unimplemented errno={}", value);
31 return Errno::SUCCESS; 31 return Errno::SUCCESS;
32 } 32 }
33} 33}
@@ -41,7 +41,7 @@ Network::Domain Translate(Domain domain) {
41 case Domain::INET: 41 case Domain::INET:
42 return Network::Domain::INET; 42 return Network::Domain::INET;
43 default: 43 default:
44 UNIMPLEMENTED_MSG("Unimplemented domain={}", static_cast<int>(domain)); 44 UNIMPLEMENTED_MSG("Unimplemented domain={}", domain);
45 return {}; 45 return {};
46 } 46 }
47} 47}
@@ -51,7 +51,7 @@ Domain Translate(Network::Domain domain) {
51 case Network::Domain::INET: 51 case Network::Domain::INET:
52 return Domain::INET; 52 return Domain::INET;
53 default: 53 default:
54 UNIMPLEMENTED_MSG("Unimplemented domain={}", static_cast<int>(domain)); 54 UNIMPLEMENTED_MSG("Unimplemented domain={}", domain);
55 return {}; 55 return {};
56 } 56 }
57} 57}
@@ -63,7 +63,7 @@ Network::Type Translate(Type type) {
63 case Type::DGRAM: 63 case Type::DGRAM:
64 return Network::Type::DGRAM; 64 return Network::Type::DGRAM;
65 default: 65 default:
66 UNIMPLEMENTED_MSG("Unimplemented type={}", static_cast<int>(type)); 66 UNIMPLEMENTED_MSG("Unimplemented type={}", type);
67 } 67 }
68} 68}
69 69
@@ -84,48 +84,48 @@ Network::Protocol Translate(Type type, Protocol protocol) {
84 case Protocol::UDP: 84 case Protocol::UDP:
85 return Network::Protocol::UDP; 85 return Network::Protocol::UDP;
86 default: 86 default:
87 UNIMPLEMENTED_MSG("Unimplemented protocol={}", static_cast<int>(protocol)); 87 UNIMPLEMENTED_MSG("Unimplemented protocol={}", protocol);
88 return Network::Protocol::TCP; 88 return Network::Protocol::TCP;
89 } 89 }
90} 90}
91 91
92u16 TranslatePollEventsToHost(u32 flags) { 92Network::PollEvents TranslatePollEventsToHost(PollEvents flags) {
93 u32 result = 0; 93 Network::PollEvents result{};
94 const auto translate = [&result, &flags](u32 from, u32 to) { 94 const auto translate = [&result, &flags](PollEvents from, Network::PollEvents to) {
95 if ((flags & from) != 0) { 95 if (True(flags & from)) {
96 flags &= ~from; 96 flags &= ~from;
97 result |= to; 97 result |= to;
98 } 98 }
99 }; 99 };
100 translate(POLL_IN, Network::POLL_IN); 100 translate(PollEvents::In, Network::PollEvents::In);
101 translate(POLL_PRI, Network::POLL_PRI); 101 translate(PollEvents::Pri, Network::PollEvents::Pri);
102 translate(POLL_OUT, Network::POLL_OUT); 102 translate(PollEvents::Out, Network::PollEvents::Out);
103 translate(POLL_ERR, Network::POLL_ERR); 103 translate(PollEvents::Err, Network::PollEvents::Err);
104 translate(POLL_HUP, Network::POLL_HUP); 104 translate(PollEvents::Hup, Network::PollEvents::Hup);
105 translate(POLL_NVAL, Network::POLL_NVAL); 105 translate(PollEvents::Nval, Network::PollEvents::Nval);
106 106
107 UNIMPLEMENTED_IF_MSG(flags != 0, "Unimplemented flags={}", flags); 107 UNIMPLEMENTED_IF_MSG((u16)flags != 0, "Unimplemented flags={}", (u16)flags);
108 return static_cast<u16>(result); 108 return result;
109} 109}
110 110
111u16 TranslatePollEventsToGuest(u32 flags) { 111PollEvents TranslatePollEventsToGuest(Network::PollEvents flags) {
112 u32 result = 0; 112 PollEvents result{};
113 const auto translate = [&result, &flags](u32 from, u32 to) { 113 const auto translate = [&result, &flags](Network::PollEvents from, PollEvents to) {
114 if ((flags & from) != 0) { 114 if (True(flags & from)) {
115 flags &= ~from; 115 flags &= ~from;
116 result |= to; 116 result |= to;
117 } 117 }
118 }; 118 };
119 119
120 translate(Network::POLL_IN, POLL_IN); 120 translate(Network::PollEvents::In, PollEvents::In);
121 translate(Network::POLL_PRI, POLL_PRI); 121 translate(Network::PollEvents::Pri, PollEvents::Pri);
122 translate(Network::POLL_OUT, POLL_OUT); 122 translate(Network::PollEvents::Out, PollEvents::Out);
123 translate(Network::POLL_ERR, POLL_ERR); 123 translate(Network::PollEvents::Err, PollEvents::Err);
124 translate(Network::POLL_HUP, POLL_HUP); 124 translate(Network::PollEvents::Hup, PollEvents::Hup);
125 translate(Network::POLL_NVAL, POLL_NVAL); 125 translate(Network::PollEvents::Nval, PollEvents::Nval);
126 126
127 UNIMPLEMENTED_IF_MSG(flags != 0, "Unimplemented flags={}", flags); 127 UNIMPLEMENTED_IF_MSG((u16)flags != 0, "Unimplemented flags={}", (u16)flags);
128 return static_cast<u16>(result); 128 return result;
129} 129}
130 130
131Network::SockAddrIn Translate(SockAddrIn value) { 131Network::SockAddrIn Translate(SockAddrIn value) {
@@ -157,7 +157,7 @@ Network::ShutdownHow Translate(ShutdownHow how) {
157 case ShutdownHow::RDWR: 157 case ShutdownHow::RDWR:
158 return Network::ShutdownHow::RDWR; 158 return Network::ShutdownHow::RDWR;
159 default: 159 default:
160 UNIMPLEMENTED_MSG("Unimplemented how={}", static_cast<int>(how)); 160 UNIMPLEMENTED_MSG("Unimplemented how={}", how);
161 return {}; 161 return {};
162 } 162 }
163} 163}
diff --git a/src/core/hle/service/sockets/sockets_translate.h b/src/core/hle/service/sockets/sockets_translate.h
index e498913d4..057d1ff22 100644
--- a/src/core/hle/service/sockets/sockets_translate.h
+++ b/src/core/hle/service/sockets/sockets_translate.h
@@ -31,10 +31,10 @@ Network::Type Translate(Type type);
31Network::Protocol Translate(Type type, Protocol protocol); 31Network::Protocol Translate(Type type, Protocol protocol);
32 32
33/// Translate abstract poll event flags to guest poll event flags 33/// Translate abstract poll event flags to guest poll event flags
34u16 TranslatePollEventsToHost(u32 flags); 34Network::PollEvents TranslatePollEventsToHost(PollEvents flags);
35 35
36/// Translate guest poll event flags to abstract poll event flags 36/// Translate guest poll event flags to abstract poll event flags
37u16 TranslatePollEventsToGuest(u32 flags); 37PollEvents TranslatePollEventsToGuest(Network::PollEvents flags);
38 38
39/// Translate guest socket address structure to abstract socket address structure 39/// Translate guest socket address structure to abstract socket address structure
40Network::SockAddrIn Translate(SockAddrIn value); 40Network::SockAddrIn Translate(SockAddrIn value);
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 7b7ac282d..abc753d5d 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -10,8 +10,8 @@
10#include "core/hle/ipc_helpers.h" 10#include "core/hle/ipc_helpers.h"
11#include "core/hle/kernel/client_port.h" 11#include "core/hle/kernel/client_port.h"
12#include "core/hle/kernel/client_session.h" 12#include "core/hle/kernel/client_session.h"
13#include "core/hle/kernel/k_scheduler.h"
13#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/scheduler.h"
15#include "core/hle/service/time/interface.h" 15#include "core/hle/service/time/interface.h"
16#include "core/hle/service/time/time.h" 16#include "core/hle/service/time/time.h"
17#include "core/hle/service/time/time_sharedmemory.h" 17#include "core/hle/service/time/time_sharedmemory.h"
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index af5b8b0b9..968cd16b6 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -282,18 +282,24 @@ public:
282 void DeserializeData() override { 282 void DeserializeData() override {
283 [[maybe_unused]] const std::u16string token = ReadInterfaceToken(); 283 [[maybe_unused]] const std::u16string token = ReadInterfaceToken();
284 data = Read<Data>(); 284 data = Read<Data>();
285 buffer = Read<NVFlinger::IGBPBuffer>(); 285 if (data.contains_object != 0) {
286 buffer_container = Read<BufferContainer>();
287 }
286 } 288 }
287 289
288 struct Data { 290 struct Data {
289 u32_le slot; 291 u32_le slot;
290 INSERT_PADDING_WORDS(1); 292 u32_le contains_object;
293 };
294
295 struct BufferContainer {
291 u32_le graphic_buffer_length; 296 u32_le graphic_buffer_length;
292 INSERT_PADDING_WORDS(1); 297 INSERT_PADDING_WORDS(1);
298 NVFlinger::IGBPBuffer buffer{};
293 }; 299 };
294 300
295 Data data; 301 Data data{};
296 NVFlinger::IGBPBuffer buffer; 302 BufferContainer buffer_container{};
297}; 303};
298 304
299class IGBPSetPreallocatedBufferResponseParcel : public Parcel { 305class IGBPSetPreallocatedBufferResponseParcel : public Parcel {
@@ -528,10 +534,9 @@ private:
528 const u32 flags = rp.Pop<u32>(); 534 const u32 flags = rp.Pop<u32>();
529 535
530 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 536 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
531 static_cast<u32>(transaction), flags); 537 transaction, flags);
532 538
533 const auto guard = nv_flinger.Lock(); 539 auto& buffer_queue = *nv_flinger.FindBufferQueue(id);
534 auto& buffer_queue = nv_flinger.FindBufferQueue(id);
535 540
536 switch (transaction) { 541 switch (transaction) {
537 case TransactionId::Connect: { 542 case TransactionId::Connect: {
@@ -541,13 +546,16 @@ private:
541 Settings::values.resolution_factor.GetValue()), 546 Settings::values.resolution_factor.GetValue()),
542 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * 547 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
543 Settings::values.resolution_factor.GetValue())}; 548 Settings::values.resolution_factor.GetValue())};
549
550 buffer_queue.Connect();
551
544 ctx.WriteBuffer(response.Serialize()); 552 ctx.WriteBuffer(response.Serialize());
545 break; 553 break;
546 } 554 }
547 case TransactionId::SetPreallocatedBuffer: { 555 case TransactionId::SetPreallocatedBuffer: {
548 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 556 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
549 557
550 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); 558 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer_container.buffer);
551 559
552 IGBPSetPreallocatedBufferResponseParcel response{}; 560 IGBPSetPreallocatedBufferResponseParcel response{};
553 ctx.WriteBuffer(response.Serialize()); 561 ctx.WriteBuffer(response.Serialize());
@@ -557,40 +565,25 @@ private:
557 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 565 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
558 const u32 width{request.data.width}; 566 const u32 width{request.data.width};
559 const u32 height{request.data.height}; 567 const u32 height{request.data.height};
560 auto result = buffer_queue.DequeueBuffer(width, height); 568
561 569 do {
562 if (result) { 570 if (auto result = buffer_queue.DequeueBuffer(width, height); result) {
563 // Buffer is available 571 // Buffer is available
564 IGBPDequeueBufferResponseParcel response{result->first, *result->second}; 572 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
565 ctx.WriteBuffer(response.Serialize()); 573 ctx.WriteBuffer(response.Serialize());
566 } else { 574 break;
567 // Wait the current thread until a buffer becomes available 575 }
568 ctx.SleepClientThread( 576 } while (buffer_queue.IsConnected());
569 "IHOSBinderDriver::DequeueBuffer", UINT64_MAX, 577
570 [=, this](std::shared_ptr<Kernel::Thread> thread,
571 Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) {
572 // Repeat TransactParcel DequeueBuffer when a buffer is available
573 const auto guard = nv_flinger.Lock();
574 auto& buffer_queue = nv_flinger.FindBufferQueue(id);
575 auto result = buffer_queue.DequeueBuffer(width, height);
576 ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
577
578 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
579 ctx.WriteBuffer(response.Serialize());
580 IPC::ResponseBuilder rb{ctx, 2};
581 rb.Push(RESULT_SUCCESS);
582 },
583 buffer_queue.GetWritableBufferWaitEvent());
584 }
585 break; 578 break;
586 } 579 }
587 case TransactionId::RequestBuffer: { 580 case TransactionId::RequestBuffer: {
588 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 581 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
589 582
590 auto& buffer = buffer_queue.RequestBuffer(request.slot); 583 auto& buffer = buffer_queue.RequestBuffer(request.slot);
591
592 IGBPRequestBufferResponseParcel response{buffer}; 584 IGBPRequestBufferResponseParcel response{buffer};
593 ctx.WriteBuffer(response.Serialize()); 585 ctx.WriteBuffer(response.Serialize());
586
594 break; 587 break;
595 } 588 }
596 case TransactionId::QueueBuffer: { 589 case TransactionId::QueueBuffer: {
@@ -676,7 +669,7 @@ private:
676 669
677 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); 670 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
678 671
679 const auto& buffer_queue = nv_flinger.FindBufferQueue(id); 672 const auto& buffer_queue = *nv_flinger.FindBufferQueue(id);
680 673
681 // TODO(Subv): Find out what this actually is. 674 // TODO(Subv): Find out what this actually is.
682 IPC::ResponseBuilder rb{ctx, 2, 1}; 675 IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -1066,8 +1059,8 @@ private:
1066 const auto scaling_mode = rp.PopEnum<NintendoScaleMode>(); 1059 const auto scaling_mode = rp.PopEnum<NintendoScaleMode>();
1067 const u64 unknown = rp.Pop<u64>(); 1060 const u64 unknown = rp.Pop<u64>();
1068 1061
1069 LOG_DEBUG(Service_VI, "called. scaling_mode=0x{:08X}, unknown=0x{:016X}", 1062 LOG_DEBUG(Service_VI, "called. scaling_mode=0x{:08X}, unknown=0x{:016X}", scaling_mode,
1070 static_cast<u32>(scaling_mode), unknown); 1063 unknown);
1071 1064
1072 IPC::ResponseBuilder rb{ctx, 2}; 1065 IPC::ResponseBuilder rb{ctx, 2};
1073 1066
@@ -1210,7 +1203,7 @@ private:
1210 void ConvertScalingMode(Kernel::HLERequestContext& ctx) { 1203 void ConvertScalingMode(Kernel::HLERequestContext& ctx) {
1211 IPC::RequestParser rp{ctx}; 1204 IPC::RequestParser rp{ctx};
1212 const auto mode = rp.PopEnum<NintendoScaleMode>(); 1205 const auto mode = rp.PopEnum<NintendoScaleMode>();
1213 LOG_DEBUG(Service_VI, "called mode={}", static_cast<u32>(mode)); 1206 LOG_DEBUG(Service_VI, "called mode={}", mode);
1214 1207
1215 const auto converted_mode = ConvertScalingModeImpl(mode); 1208 const auto converted_mode = ConvertScalingModeImpl(mode);
1216 1209
@@ -1230,8 +1223,8 @@ private:
1230 const auto height = rp.Pop<u64>(); 1223 const auto height = rp.Pop<u64>();
1231 LOG_DEBUG(Service_VI, "called width={}, height={}", width, height); 1224 LOG_DEBUG(Service_VI, "called width={}, height={}", width, height);
1232 1225
1233 constexpr std::size_t base_size = 0x20000; 1226 constexpr u64 base_size = 0x20000;
1234 constexpr std::size_t alignment = 0x1000; 1227 constexpr u64 alignment = 0x1000;
1235 const auto texture_size = width * height * 4; 1228 const auto texture_size = width * height * 4;
1236 const auto out_size = (texture_size + base_size - 1) / base_size * base_size; 1229 const auto out_size = (texture_size + base_size - 1) / base_size * base_size;
1237 1230
@@ -1311,7 +1304,7 @@ void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx, Core::System&
1311 const auto policy = rp.PopEnum<Policy>(); 1304 const auto policy = rp.PopEnum<Policy>();
1312 1305
1313 if (!IsValidServiceAccess(permission, policy)) { 1306 if (!IsValidServiceAccess(permission, policy)) {
1314 LOG_ERROR(Service_VI, "Permission denied for policy {}", static_cast<u32>(policy)); 1307 LOG_ERROR(Service_VI, "Permission denied for policy {}", policy);
1315 IPC::ResponseBuilder rb{ctx, 2}; 1308 IPC::ResponseBuilder rb{ctx, 2};
1316 rb.Push(ERR_PERMISSION_DENIED); 1309 rb.Push(ERR_PERMISSION_DENIED);
1317 return; 1310 return;
diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h
index 35d340317..3c968580f 100644
--- a/src/core/loader/deconstructed_rom_directory.h
+++ b/src/core/loader/deconstructed_rom_directory.h
@@ -32,7 +32,7 @@ public:
32 32
33 /** 33 /**
34 * Returns the type of the file 34 * Returns the type of the file
35 * @param file std::shared_ptr<VfsFile> open file 35 * @param file open file
36 * @return FileType found, or FileType::Error if this loader doesn't know it 36 * @return FileType found, or FileType::Error if this loader doesn't know it
37 */ 37 */
38 static FileType IdentifyType(const FileSys::VirtualFile& file); 38 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h
index 3527933ad..2067932c7 100644
--- a/src/core/loader/elf.h
+++ b/src/core/loader/elf.h
@@ -21,7 +21,7 @@ public:
21 21
22 /** 22 /**
23 * Returns the type of the file 23 * Returns the type of the file
24 * @param file std::shared_ptr<VfsFile> open file 24 * @param file open file
25 * @return FileType found, or FileType::Error if this loader doesn't know it 25 * @return FileType found, or FileType::Error if this loader doesn't know it
26 */ 26 */
27 static FileType IdentifyType(const FileSys::VirtualFile& file); 27 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/kip.h b/src/core/loader/kip.h
index dee05a7b5..14a85e295 100644
--- a/src/core/loader/kip.h
+++ b/src/core/loader/kip.h
@@ -23,7 +23,7 @@ public:
23 23
24 /** 24 /**
25 * Returns the type of the file 25 * Returns the type of the file
26 * @param file std::shared_ptr<VfsFile> open file 26 * @param file open file
27 * @return FileType found, or FileType::Error if this loader doesn't know it 27 * @return FileType found, or FileType::Error if this loader doesn't know it
28 */ 28 */
29 static FileType IdentifyType(const FileSys::VirtualFile& file); 29 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h
index c2b7722b5..a5b5e2ae1 100644
--- a/src/core/loader/nax.h
+++ b/src/core/loader/nax.h
@@ -28,7 +28,7 @@ public:
28 28
29 /** 29 /**
30 * Returns the type of the file 30 * Returns the type of the file
31 * @param file std::shared_ptr<VfsFile> open file 31 * @param file open file
32 * @return FileType found, or FileType::Error if this loader doesn't know it 32 * @return FileType found, or FileType::Error if this loader doesn't know it
33 */ 33 */
34 static FileType IdentifyType(const FileSys::VirtualFile& file); 34 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h
index 711070294..918792800 100644
--- a/src/core/loader/nca.h
+++ b/src/core/loader/nca.h
@@ -28,7 +28,7 @@ public:
28 28
29 /** 29 /**
30 * Returns the type of the file 30 * Returns the type of the file
31 * @param file std::shared_ptr<VfsFile> open file 31 * @param file open file
32 * @return FileType found, or FileType::Error if this loader doesn't know it 32 * @return FileType found, or FileType::Error if this loader doesn't know it
33 */ 33 */
34 static FileType IdentifyType(const FileSys::VirtualFile& file); 34 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index a2aab2ecc..a82b66221 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -32,7 +32,7 @@ public:
32 32
33 /** 33 /**
34 * Returns the type of the file 34 * Returns the type of the file
35 * @param file std::shared_ptr<VfsFile> open file 35 * @param file open file
36 * @return FileType found, or FileType::Error if this loader doesn't know it 36 * @return FileType found, or FileType::Error if this loader doesn't know it
37 */ 37 */
38 static FileType IdentifyType(const FileSys::VirtualFile& file); 38 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index d331096ae..3af461b5f 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -75,7 +75,7 @@ public:
75 75
76 /** 76 /**
77 * Returns the type of the file 77 * Returns the type of the file
78 * @param file std::shared_ptr<VfsFile> open file 78 * @param file open file
79 * @return FileType found, or FileType::Error if this loader doesn't know it 79 * @return FileType found, or FileType::Error if this loader doesn't know it
80 */ 80 */
81 static FileType IdentifyType(const FileSys::VirtualFile& file); 81 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h
index f0518ac47..d48d87f2c 100644
--- a/src/core/loader/nsp.h
+++ b/src/core/loader/nsp.h
@@ -34,7 +34,7 @@ public:
34 34
35 /** 35 /**
36 * Returns the type of the file 36 * Returns the type of the file
37 * @param file std::shared_ptr<VfsFile> open file 37 * @param file open file
38 * @return FileType found, or FileType::Error if this loader doesn't know it 38 * @return FileType found, or FileType::Error if this loader doesn't know it
39 */ 39 */
40 static FileType IdentifyType(const FileSys::VirtualFile& file); 40 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index 764dc8328..9f0ceb5ef 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -34,7 +34,7 @@ public:
34 34
35 /** 35 /**
36 * Returns the type of the file 36 * Returns the type of the file
37 * @param file std::shared_ptr<VfsFile> open file 37 * @param file open file
38 * @return FileType found, or FileType::Error if this loader doesn't know it 38 * @return FileType found, or FileType::Error if this loader doesn't know it
39 */ 39 */
40 static FileType IdentifyType(const FileSys::VirtualFile& file); 40 static FileType IdentifyType(const FileSys::VirtualFile& file);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index b88aa5c40..54a848936 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstring> 6#include <cstring>
7#include <mutex>
7#include <optional> 8#include <optional>
8#include <utility> 9#include <utility>
9 10
@@ -497,7 +498,21 @@ struct Memory::Impl {
497 return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size); 498 return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size);
498 } 499 }
499 500
501 struct PageEntry {
502 u8* const pointer;
503 const Common::PageType attribute;
504 };
505
506 PageEntry SafePageEntry(std::size_t base) const {
507 std::lock_guard lock{rasterizer_cache_guard};
508 return {
509 .pointer = current_page_table->pointers[base],
510 .attribute = current_page_table->attributes[base],
511 };
512 }
513
500 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { 514 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
515 std::lock_guard lock{rasterizer_cache_guard};
501 if (vaddr == 0) { 516 if (vaddr == 0) {
502 return; 517 return;
503 } 518 }
@@ -630,16 +645,22 @@ struct Memory::Impl {
630 */ 645 */
631 template <typename T> 646 template <typename T>
632 T Read(const VAddr vaddr) { 647 T Read(const VAddr vaddr) {
633 const u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 648 // Avoid adding any extra logic to this fast-path block
634 if (page_pointer != nullptr) { 649 if (const u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) {
635 // NOTE: Avoid adding any extra logic to this fast-path block
636 T value; 650 T value;
637 std::memcpy(&value, &page_pointer[vaddr], sizeof(T)); 651 std::memcpy(&value, &pointer[vaddr], sizeof(T));
638 return value; 652 return value;
639 } 653 }
640 654
641 const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 655 // Otherwise, we need to grab the page with a lock, in case it is currently being modified
642 switch (type) { 656 const auto entry = SafePageEntry(vaddr >> PAGE_BITS);
657 if (entry.pointer) {
658 T value;
659 std::memcpy(&value, &entry.pointer[vaddr], sizeof(T));
660 return value;
661 }
662
663 switch (entry.attribute) {
643 case Common::PageType::Unmapped: 664 case Common::PageType::Unmapped:
644 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); 665 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
645 return 0; 666 return 0;
@@ -667,20 +688,24 @@ struct Memory::Impl {
667 * @tparam T The data type to write to memory. This type *must* be 688 * @tparam T The data type to write to memory. This type *must* be
668 * trivially copyable, otherwise the behavior of this function 689 * trivially copyable, otherwise the behavior of this function
669 * is undefined. 690 * is undefined.
670 *
671 * @returns The instance of T write to the specified virtual address.
672 */ 691 */
673 template <typename T> 692 template <typename T>
674 void Write(const VAddr vaddr, const T data) { 693 void Write(const VAddr vaddr, const T data) {
675 u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 694 // Avoid adding any extra logic to this fast-path block
676 if (page_pointer != nullptr) { 695 if (u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) {
677 // NOTE: Avoid adding any extra logic to this fast-path block 696 std::memcpy(&pointer[vaddr], &data, sizeof(T));
678 std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
679 return; 697 return;
680 } 698 }
681 699
682 const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 700 // Otherwise, we need to grab the page with a lock, in case it is currently being modified
683 switch (type) { 701 const auto entry = SafePageEntry(vaddr >> PAGE_BITS);
702 if (entry.pointer) {
703 // Memory was mapped, we are done
704 std::memcpy(&entry.pointer[vaddr], &data, sizeof(T));
705 return;
706 }
707
708 switch (entry.attribute) {
684 case Common::PageType::Unmapped: 709 case Common::PageType::Unmapped:
685 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, 710 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
686 static_cast<u32>(data), vaddr); 711 static_cast<u32>(data), vaddr);
@@ -758,6 +783,7 @@ struct Memory::Impl {
758 return true; 783 return true;
759 } 784 }
760 785
786 mutable std::mutex rasterizer_cache_guard;
761 Common::PageTable* current_page_table = nullptr; 787 Common::PageTable* current_page_table = nullptr;
762 Core::System& system; 788 Core::System& system;
763}; 789};
diff --git a/src/core/network/network.cpp b/src/core/network/network.cpp
index 5ef2e8511..681e93468 100644
--- a/src/core/network/network.cpp
+++ b/src/core/network/network.cpp
@@ -11,7 +11,7 @@
11#ifdef _WIN32 11#ifdef _WIN32
12#define _WINSOCK_DEPRECATED_NO_WARNINGS // gethostname 12#define _WINSOCK_DEPRECATED_NO_WARNINGS // gethostname
13#include <winsock2.h> 13#include <winsock2.h>
14#elif __unix__ 14#elif YUZU_UNIX
15#include <errno.h> 15#include <errno.h>
16#include <fcntl.h> 16#include <fcntl.h>
17#include <netdb.h> 17#include <netdb.h>
@@ -54,7 +54,7 @@ constexpr IPv4Address TranslateIPv4(in_addr addr) {
54sockaddr TranslateFromSockAddrIn(SockAddrIn input) { 54sockaddr TranslateFromSockAddrIn(SockAddrIn input) {
55 sockaddr_in result; 55 sockaddr_in result;
56 56
57#ifdef __unix__ 57#if YUZU_UNIX
58 result.sin_len = sizeof(result); 58 result.sin_len = sizeof(result);
59#endif 59#endif
60 60
@@ -63,7 +63,7 @@ sockaddr TranslateFromSockAddrIn(SockAddrIn input) {
63 result.sin_family = AF_INET; 63 result.sin_family = AF_INET;
64 break; 64 break;
65 default: 65 default:
66 UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", static_cast<int>(input.family)); 66 UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", input.family);
67 result.sin_family = AF_INET; 67 result.sin_family = AF_INET;
68 break; 68 break;
69 } 69 }
@@ -99,7 +99,7 @@ bool EnableNonBlock(SOCKET fd, bool enable) {
99 return ioctlsocket(fd, FIONBIO, &value) != SOCKET_ERROR; 99 return ioctlsocket(fd, FIONBIO, &value) != SOCKET_ERROR;
100} 100}
101 101
102#elif __unix__ // ^ _WIN32 v __unix__ 102#elif YUZU_UNIX // ^ _WIN32 v YUZU_UNIX
103 103
104using SOCKET = int; 104using SOCKET = int;
105using WSAPOLLFD = pollfd; 105using WSAPOLLFD = pollfd;
@@ -133,7 +133,7 @@ sockaddr TranslateFromSockAddrIn(SockAddrIn input) {
133 result.sin_family = AF_INET; 133 result.sin_family = AF_INET;
134 break; 134 break;
135 default: 135 default:
136 UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", static_cast<int>(input.family)); 136 UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", input.family);
137 result.sin_family = AF_INET; 137 result.sin_family = AF_INET;
138 break; 138 break;
139 } 139 }
@@ -186,7 +186,7 @@ int TranslateDomain(Domain domain) {
186 case Domain::INET: 186 case Domain::INET:
187 return AF_INET; 187 return AF_INET;
188 default: 188 default:
189 UNIMPLEMENTED_MSG("Unimplemented domain={}", static_cast<int>(domain)); 189 UNIMPLEMENTED_MSG("Unimplemented domain={}", domain);
190 return 0; 190 return 0;
191 } 191 }
192} 192}
@@ -198,7 +198,7 @@ int TranslateType(Type type) {
198 case Type::DGRAM: 198 case Type::DGRAM:
199 return SOCK_DGRAM; 199 return SOCK_DGRAM;
200 default: 200 default:
201 UNIMPLEMENTED_MSG("Unimplemented type={}", static_cast<int>(type)); 201 UNIMPLEMENTED_MSG("Unimplemented type={}", type);
202 return 0; 202 return 0;
203 } 203 }
204} 204}
@@ -210,7 +210,7 @@ int TranslateProtocol(Protocol protocol) {
210 case Protocol::UDP: 210 case Protocol::UDP:
211 return IPPROTO_UDP; 211 return IPPROTO_UDP;
212 default: 212 default:
213 UNIMPLEMENTED_MSG("Unimplemented protocol={}", static_cast<int>(protocol)); 213 UNIMPLEMENTED_MSG("Unimplemented protocol={}", protocol);
214 return 0; 214 return 0;
215 } 215 }
216} 216}
@@ -238,49 +238,49 @@ SockAddrIn TranslateToSockAddrIn(sockaddr input_) {
238 return result; 238 return result;
239} 239}
240 240
241u16 TranslatePollEvents(u32 events) { 241short TranslatePollEvents(PollEvents events) {
242 u32 result = 0; 242 short result = 0;
243 243
244 if ((events & POLL_IN) != 0) { 244 if (True(events & PollEvents::In)) {
245 events &= ~POLL_IN; 245 events &= ~PollEvents::In;
246 result |= POLLIN; 246 result |= POLLIN;
247 } 247 }
248 if ((events & POLL_PRI) != 0) { 248 if (True(events & PollEvents::Pri)) {
249 events &= ~POLL_PRI; 249 events &= ~PollEvents::Pri;
250#ifdef _WIN32 250#ifdef _WIN32
251 LOG_WARNING(Service, "Winsock doesn't support POLLPRI"); 251 LOG_WARNING(Service, "Winsock doesn't support POLLPRI");
252#else 252#else
253 result |= POLL_PRI; 253 result |= POLLPRI;
254#endif 254#endif
255 } 255 }
256 if ((events & POLL_OUT) != 0) { 256 if (True(events & PollEvents::Out)) {
257 events &= ~POLL_OUT; 257 events &= ~PollEvents::Out;
258 result |= POLLOUT; 258 result |= POLLOUT;
259 } 259 }
260 260
261 UNIMPLEMENTED_IF_MSG(events != 0, "Unhandled guest events=0x{:x}", events); 261 UNIMPLEMENTED_IF_MSG((u16)events != 0, "Unhandled guest events=0x{:x}", (u16)events);
262 262
263 return static_cast<u16>(result); 263 return result;
264} 264}
265 265
266u16 TranslatePollRevents(u32 revents) { 266PollEvents TranslatePollRevents(short revents) {
267 u32 result = 0; 267 PollEvents result{};
268 const auto translate = [&result, &revents](u32 host, u32 guest) { 268 const auto translate = [&result, &revents](short host, PollEvents guest) {
269 if ((revents & host) != 0) { 269 if ((revents & host) != 0) {
270 revents &= ~host; 270 revents &= static_cast<short>(~host);
271 result |= guest; 271 result |= guest;
272 } 272 }
273 }; 273 };
274 274
275 translate(POLLIN, POLL_IN); 275 translate(POLLIN, PollEvents::In);
276 translate(POLLPRI, POLL_PRI); 276 translate(POLLPRI, PollEvents::Pri);
277 translate(POLLOUT, POLL_OUT); 277 translate(POLLOUT, PollEvents::Out);
278 translate(POLLERR, POLL_ERR); 278 translate(POLLERR, PollEvents::Err);
279 translate(POLLHUP, POLL_HUP); 279 translate(POLLHUP, PollEvents::Hup);
280 280
281 UNIMPLEMENTED_IF_MSG(revents != 0, "Unhandled host revents=0x{:x}", revents); 281 UNIMPLEMENTED_IF_MSG(revents != 0, "Unhandled host revents=0x{:x}", revents);
282 282
283 return static_cast<u16>(result); 283 return result;
284} 284}
285 285
286template <typename T> 286template <typename T>
@@ -350,7 +350,7 @@ std::pair<s32, Errno> Poll(std::vector<PollFD>& pollfds, s32 timeout) {
350 } 350 }
351 351
352 for (size_t i = 0; i < num; ++i) { 352 for (size_t i = 0; i < num; ++i) {
353 pollfds[i].revents = TranslatePollRevents(static_cast<u32>(host_pollfds[i].revents)); 353 pollfds[i].revents = TranslatePollRevents(host_pollfds[i].revents);
354 } 354 }
355 355
356 if (result > 0) { 356 if (result > 0) {
@@ -482,7 +482,7 @@ Errno Socket::Shutdown(ShutdownHow how) {
482 host_how = SD_BOTH; 482 host_how = SD_BOTH;
483 break; 483 break;
484 default: 484 default:
485 UNIMPLEMENTED_MSG("Unimplemented flag how={}", static_cast<int>(how)); 485 UNIMPLEMENTED_MSG("Unimplemented flag how={}", how);
486 return Errno::SUCCESS; 486 return Errno::SUCCESS;
487 } 487 }
488 if (shutdown(fd, host_how) != SOCKET_ERROR) { 488 if (shutdown(fd, host_how) != SOCKET_ERROR) {
diff --git a/src/core/network/network.h b/src/core/network/network.h
index 0622e4593..76b2821f2 100644
--- a/src/core/network/network.h
+++ b/src/core/network/network.h
@@ -61,19 +61,25 @@ struct SockAddrIn {
61}; 61};
62 62
63/// Cross-platform poll fd structure 63/// Cross-platform poll fd structure
64
65enum class PollEvents : u16 {
66 // Using Pascal case because IN is a macro on Windows.
67 In = 1 << 0,
68 Pri = 1 << 1,
69 Out = 1 << 2,
70 Err = 1 << 3,
71 Hup = 1 << 4,
72 Nval = 1 << 5,
73};
74
75DECLARE_ENUM_FLAG_OPERATORS(PollEvents);
76
64struct PollFD { 77struct PollFD {
65 Socket* socket; 78 Socket* socket;
66 u16 events; 79 PollEvents events;
67 u16 revents; 80 PollEvents revents;
68}; 81};
69 82
70constexpr u16 POLL_IN = 1 << 0;
71constexpr u16 POLL_PRI = 1 << 1;
72constexpr u16 POLL_OUT = 1 << 2;
73constexpr u16 POLL_ERR = 1 << 3;
74constexpr u16 POLL_HUP = 1 << 4;
75constexpr u16 POLL_NVAL = 1 << 5;
76
77class NetworkInstance { 83class NetworkInstance {
78public: 84public:
79 explicit NetworkInstance(); 85 explicit NetworkInstance();
diff --git a/src/core/network/sockets.h b/src/core/network/sockets.h
index 7bdff0fe4..a44393325 100644
--- a/src/core/network/sockets.h
+++ b/src/core/network/sockets.h
@@ -9,7 +9,7 @@
9 9
10#if defined(_WIN32) 10#if defined(_WIN32)
11#include <winsock.h> 11#include <winsock.h>
12#elif !defined(__unix__) 12#elif !YUZU_UNIX
13#error "Platform not implemented" 13#error "Platform not implemented"
14#endif 14#endif
15 15
@@ -84,7 +84,7 @@ public:
84 84
85#if defined(_WIN32) 85#if defined(_WIN32)
86 SOCKET fd = INVALID_SOCKET; 86 SOCKET fd = INVALID_SOCKET;
87#elif defined(__unix__) 87#elif YUZU_UNIX
88 int fd = -1; 88 int fd = -1;
89#endif 89#endif
90}; 90};
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index e9997a263..39306509a 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -72,8 +72,6 @@ void LogSettings() {
72 log_setting("DataStorage_UseVirtualSd", values.use_virtual_sd); 72 log_setting("DataStorage_UseVirtualSd", values.use_virtual_sd);
73 log_setting("DataStorage_NandDir", Common::FS::GetUserPath(Common::FS::UserPath::NANDDir)); 73 log_setting("DataStorage_NandDir", Common::FS::GetUserPath(Common::FS::UserPath::NANDDir));
74 log_setting("DataStorage_SdmcDir", Common::FS::GetUserPath(Common::FS::UserPath::SDMCDir)); 74 log_setting("DataStorage_SdmcDir", Common::FS::GetUserPath(Common::FS::UserPath::SDMCDir));
75 log_setting("Debugging_UseGdbstub", values.use_gdbstub);
76 log_setting("Debugging_GdbstubPort", values.gdbstub_port);
77 log_setting("Debugging_ProgramArgs", values.program_args); 75 log_setting("Debugging_ProgramArgs", values.program_args);
78 log_setting("Services_BCATBackend", values.bcat_backend); 76 log_setting("Services_BCATBackend", values.bcat_backend);
79 log_setting("Services_BCATBoxcatLocal", values.bcat_boxcat_local); 77 log_setting("Services_BCATBoxcatLocal", values.bcat_boxcat_local);
@@ -150,9 +148,4 @@ void RestoreGlobalState(bool is_powered_on) {
150 values.motion_enabled.SetGlobal(true); 148 values.motion_enabled.SetGlobal(true);
151} 149}
152 150
153void Sanitize() {
154 values.use_asynchronous_gpu_emulation.SetValue(
155 values.use_asynchronous_gpu_emulation.GetValue() || values.use_multi_core.GetValue());
156}
157
158} // namespace Settings 151} // namespace Settings
diff --git a/src/core/settings.h b/src/core/settings.h
index 8e076f7ef..0cd3c0c84 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -180,6 +180,8 @@ struct Values {
180 std::string motion_device; 180 std::string motion_device;
181 std::string udp_input_servers; 181 std::string udp_input_servers;
182 182
183 bool emulate_analog_keyboard;
184
183 bool mouse_enabled; 185 bool mouse_enabled;
184 std::string mouse_device; 186 std::string mouse_device;
185 MouseButtonsRaw mouse_buttons; 187 MouseButtonsRaw mouse_buttons;
@@ -255,7 +257,4 @@ void LogSettings();
255// Restore the global state of all applicable settings in the Values struct 257// Restore the global state of all applicable settings in the Values struct
256void RestoreGlobalState(bool is_powered_on); 258void RestoreGlobalState(bool is_powered_on);
257 259
258// Fixes settings that are known to cause issues with the emulator
259void Sanitize();
260
261} // namespace Settings 260} // namespace Settings
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index d748c1c04..40b516f85 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -6,6 +6,7 @@
6#include <cmath> 6#include <cmath>
7#include <thread> 7#include <thread>
8#include "common/math_util.h" 8#include "common/math_util.h"
9#include "core/settings.h"
9#include "input_common/analog_from_button.h" 10#include "input_common/analog_from_button.h"
10 11
11namespace InputCommon { 12namespace InputCommon {
@@ -112,7 +113,26 @@ public:
112 } 113 }
113 114
114 std::tuple<float, float> GetStatus() const override { 115 std::tuple<float, float> GetStatus() const override {
115 return std::make_tuple(std::cos(angle) * amplitude, std::sin(angle) * amplitude); 116 if (Settings::values.emulate_analog_keyboard) {
117 return std::make_tuple(std::cos(angle) * amplitude, std::sin(angle) * amplitude);
118 }
119 constexpr float SQRT_HALF = 0.707106781f;
120 int x = 0, y = 0;
121 if (right->GetStatus()) {
122 ++x;
123 }
124 if (left->GetStatus()) {
125 --x;
126 }
127 if (up->GetStatus()) {
128 ++y;
129 }
130 if (down->GetStatus()) {
131 --y;
132 }
133 const float coef = modifier->GetStatus() ? modifier_scale : 1.0f;
134 return std::make_tuple(static_cast<float>(x) * coef * (y == 0 ? 1.0f : SQRT_HALF),
135 static_cast<float>(y) * coef * (x == 0 ? 1.0f : SQRT_HALF));
116 } 136 }
117 137
118 bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { 138 bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override {
diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp
index 4d1052414..9670bdeb2 100644
--- a/src/input_common/gcadapter/gc_poller.cpp
+++ b/src/input_common/gcadapter/gc_poller.cpp
@@ -139,10 +139,10 @@ void GCButtonFactory::EndConfiguration() {
139 139
140class GCAnalog final : public Input::AnalogDevice { 140class GCAnalog final : public Input::AnalogDevice {
141public: 141public:
142 explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, 142 explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_,
143 const GCAdapter::Adapter* adapter, float range_) 143 float deadzone_, float range_, const GCAdapter::Adapter* adapter)
144 : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter), 144 : port(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_),
145 range(range_) {} 145 deadzone(deadzone_), range(range_), gcadapter(adapter) {}
146 146
147 float GetAxis(u32 axis) const { 147 float GetAxis(u32 axis) const {
148 if (gcadapter->DeviceConnected(port)) { 148 if (gcadapter->DeviceConnected(port)) {
@@ -157,7 +157,12 @@ public:
157 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { 157 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
158 float x = GetAxis(analog_axis_x); 158 float x = GetAxis(analog_axis_x);
159 float y = GetAxis(analog_axis_y); 159 float y = GetAxis(analog_axis_y);
160 160 if (invert_x) {
161 x = -x;
162 }
163 if (invert_y) {
164 y = -y;
165 }
161 // Make sure the coordinates are in the unit circle, 166 // Make sure the coordinates are in the unit circle,
162 // otherwise normalize it. 167 // otherwise normalize it.
163 float r = x * x + y * y; 168 float r = x * x + y * y;
@@ -200,9 +205,11 @@ private:
200 const u32 port; 205 const u32 port;
201 const u32 axis_x; 206 const u32 axis_x;
202 const u32 axis_y; 207 const u32 axis_y;
208 const bool invert_x;
209 const bool invert_y;
203 const float deadzone; 210 const float deadzone;
204 const GCAdapter::Adapter* gcadapter;
205 const float range; 211 const float range;
212 const GCAdapter::Adapter* gcadapter;
206 mutable std::mutex mutex; 213 mutable std::mutex mutex;
207}; 214};
208 215
@@ -223,8 +230,13 @@ std::unique_ptr<Input::AnalogDevice> GCAnalogFactory::Create(const Common::Param
223 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); 230 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1));
224 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); 231 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
225 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); 232 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
233 const std::string invert_x_value = params.Get("invert_x", "+");
234 const std::string invert_y_value = params.Get("invert_y", "+");
235 const bool invert_x = invert_x_value == "-";
236 const bool invert_y = invert_y_value == "-";
226 237
227 return std::make_unique<GCAnalog>(port, axis_x, axis_y, deadzone, adapter.get(), range); 238 return std::make_unique<GCAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range,
239 adapter.get());
228} 240}
229 241
230void GCAnalogFactory::BeginConfiguration() { 242void GCAnalogFactory::BeginConfiguration() {
@@ -282,6 +294,8 @@ Common::ParamPackage GCAnalogFactory::GetNextInput() {
282 params.Set("port", controller_number); 294 params.Set("port", controller_number);
283 params.Set("axis_x", analog_x_axis); 295 params.Set("axis_x", analog_x_axis);
284 params.Set("axis_y", analog_y_axis); 296 params.Set("axis_y", analog_y_axis);
297 params.Set("invert_x", "+");
298 params.Set("invert_y", "+");
285 analog_x_axis = -1; 299 analog_x_axis = -1;
286 analog_y_axis = -1; 300 analog_y_axis = -1;
287 controller_number = -1; 301 controller_number = -1;
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp
index 7445ad3ad..508eb0c7d 100644
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -62,10 +62,10 @@ void MouseButtonFactory::EndConfiguration() {
62 62
63class MouseAnalog final : public Input::AnalogDevice { 63class MouseAnalog final : public Input::AnalogDevice {
64public: 64public:
65 explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, float range_, 65 explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_,
66 const MouseInput::Mouse* mouse_input_) 66 float deadzone_, float range_, const MouseInput::Mouse* mouse_input_)
67 : button(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), range(range_), 67 : button(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_),
68 mouse_input(mouse_input_) {} 68 deadzone(deadzone_), range(range_), mouse_input(mouse_input_) {}
69 69
70 float GetAxis(u32 axis) const { 70 float GetAxis(u32 axis) const {
71 std::lock_guard lock{mutex}; 71 std::lock_guard lock{mutex};
@@ -77,6 +77,12 @@ public:
77 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { 77 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
78 float x = GetAxis(analog_axis_x); 78 float x = GetAxis(analog_axis_x);
79 float y = GetAxis(analog_axis_y); 79 float y = GetAxis(analog_axis_y);
80 if (invert_x) {
81 x = -x;
82 }
83 if (invert_y) {
84 y = -y;
85 }
80 86
81 // Make sure the coordinates are in the unit circle, 87 // Make sure the coordinates are in the unit circle,
82 // otherwise normalize it. 88 // otherwise normalize it.
@@ -104,6 +110,8 @@ private:
104 const u32 button; 110 const u32 button;
105 const u32 axis_x; 111 const u32 axis_x;
106 const u32 axis_y; 112 const u32 axis_y;
113 const bool invert_x;
114 const bool invert_y;
107 const float deadzone; 115 const float deadzone;
108 const float range; 116 const float range;
109 const MouseInput::Mouse* mouse_input; 117 const MouseInput::Mouse* mouse_input;
@@ -128,8 +136,13 @@ std::unique_ptr<Input::AnalogDevice> MouseAnalogFactory::Create(
128 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); 136 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1));
129 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); 137 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
130 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); 138 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
139 const std::string invert_x_value = params.Get("invert_x", "+");
140 const std::string invert_y_value = params.Get("invert_y", "+");
141 const bool invert_x = invert_x_value == "-";
142 const bool invert_y = invert_y_value == "-";
131 143
132 return std::make_unique<MouseAnalog>(port, axis_x, axis_y, deadzone, range, mouse_input.get()); 144 return std::make_unique<MouseAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range,
145 mouse_input.get());
133} 146}
134 147
135void MouseAnalogFactory::BeginConfiguration() { 148void MouseAnalogFactory::BeginConfiguration() {
@@ -153,6 +166,8 @@ Common::ParamPackage MouseAnalogFactory::GetNextInput() const {
153 params.Set("port", static_cast<u16>(pad.button)); 166 params.Set("port", static_cast<u16>(pad.button));
154 params.Set("axis_x", 0); 167 params.Set("axis_x", 0);
155 params.Set("axis_y", 1); 168 params.Set("axis_y", 1);
169 params.Set("invert_x", "+");
170 params.Set("invert_y", "+");
156 return params; 171 return params;
157 } 172 }
158 } 173 }
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
index d56b7587b..d32eb732a 100644
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -352,13 +352,20 @@ private:
352class SDLAnalog final : public Input::AnalogDevice { 352class SDLAnalog final : public Input::AnalogDevice {
353public: 353public:
354 explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, 354 explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_,
355 float deadzone_, float range_) 355 bool invert_x_, bool invert_y_, float deadzone_, float range_)
356 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), 356 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_),
357 range(range_) {} 357 invert_y(invert_y_), deadzone(deadzone_), range(range_) {}
358 358
359 std::tuple<float, float> GetStatus() const override { 359 std::tuple<float, float> GetStatus() const override {
360 const auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range); 360 auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range);
361 const float r = std::sqrt((x * x) + (y * y)); 361 const float r = std::sqrt((x * x) + (y * y));
362 if (invert_x) {
363 x = -x;
364 }
365 if (invert_y) {
366 y = -y;
367 }
368
362 if (r > deadzone) { 369 if (r > deadzone) {
363 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone), 370 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
364 y / r * (r - deadzone) / (1 - deadzone)); 371 y / r * (r - deadzone) / (1 - deadzone));
@@ -386,6 +393,8 @@ private:
386 std::shared_ptr<SDLJoystick> joystick; 393 std::shared_ptr<SDLJoystick> joystick;
387 const int axis_x; 394 const int axis_x;
388 const int axis_y; 395 const int axis_y;
396 const bool invert_x;
397 const bool invert_y;
389 const float deadzone; 398 const float deadzone;
390 const float range; 399 const float range;
391}; 400};
@@ -572,12 +581,17 @@ public:
572 const int axis_y = params.Get("axis_y", 1); 581 const int axis_y = params.Get("axis_y", 1);
573 const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); 582 const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
574 const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); 583 const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
584 const std::string invert_x_value = params.Get("invert_x", "+");
585 const std::string invert_y_value = params.Get("invert_y", "+");
586 const bool invert_x = invert_x_value == "-";
587 const bool invert_y = invert_y_value == "-";
575 auto joystick = state.GetSDLJoystickByGUID(guid, port); 588 auto joystick = state.GetSDLJoystickByGUID(guid, port);
576 589
577 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash 590 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
578 joystick->SetAxis(axis_x, 0); 591 joystick->SetAxis(axis_x, 0);
579 joystick->SetAxis(axis_y, 0); 592 joystick->SetAxis(axis_y, 0);
580 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone, range); 593 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone,
594 range);
581 } 595 }
582 596
583private: 597private:
@@ -886,6 +900,8 @@ Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& gui
886 params.Set("guid", guid); 900 params.Set("guid", guid);
887 params.Set("axis_x", axis_x); 901 params.Set("axis_x", axis_x);
888 params.Set("axis_y", axis_y); 902 params.Set("axis_y", axis_y);
903 params.Set("invert_x", "+");
904 params.Set("invert_y", "+");
889 return params; 905 return params;
890} 906}
891} // Anonymous namespace 907} // Anonymous namespace
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index 17a9225d7..412d57896 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -225,6 +225,11 @@ void Client::OnPortInfo([[maybe_unused]] Response::PortInfo data) {
225} 225}
226 226
227void Client::OnPadData(Response::PadData data, std::size_t client) { 227void Client::OnPadData(Response::PadData data, std::size_t client) {
228 // Accept packets only for the correct pad
229 if (static_cast<u8>(clients[client].pad_index) != data.info.id) {
230 return;
231 }
232
228 LOG_TRACE(Input, "PadData packet received"); 233 LOG_TRACE(Input, "PadData packet received");
229 if (data.packet_counter == clients[client].packet_sequence) { 234 if (data.packet_counter == clients[client].packet_sequence) {
230 LOG_WARNING( 235 LOG_WARNING(
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 47ef30aa9..d80b0b688 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -2,7 +2,6 @@ add_executable(tests
2 common/bit_field.cpp 2 common/bit_field.cpp
3 common/bit_utils.cpp 3 common/bit_utils.cpp
4 common/fibers.cpp 4 common/fibers.cpp
5 common/multi_level_queue.cpp
6 common/param_package.cpp 5 common/param_package.cpp
7 common/ring_buffer.cpp 6 common/ring_buffer.cpp
8 core/arm/arm_test_common.cpp 7 core/arm/arm_test_common.cpp
diff --git a/src/tests/common/multi_level_queue.cpp b/src/tests/common/multi_level_queue.cpp
deleted file mode 100644
index cca7ec7da..000000000
--- a/src/tests/common/multi_level_queue.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <catch2/catch.hpp>
6#include <math.h>
7#include "common/common_types.h"
8#include "common/multi_level_queue.h"
9
10namespace Common {
11
12TEST_CASE("MultiLevelQueue", "[common]") {
13 std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0};
14 Common::MultiLevelQueue<f32, 64> mlq;
15 REQUIRE(mlq.empty());
16 mlq.add(values[2], 2);
17 mlq.add(values[7], 7);
18 mlq.add(values[3], 3);
19 mlq.add(values[4], 4);
20 mlq.add(values[0], 0);
21 mlq.add(values[5], 5);
22 mlq.add(values[6], 6);
23 mlq.add(values[1], 1);
24 u32 index = 0;
25 bool all_set = true;
26 for (auto& f : mlq) {
27 all_set &= (f == values[index]);
28 index++;
29 }
30 REQUIRE(all_set);
31 REQUIRE(!mlq.empty());
32 f32 v = 8.0;
33 mlq.add(v, 2);
34 v = -7.0;
35 mlq.add(v, 2, false);
36 REQUIRE(mlq.front(2) == -7.0);
37 mlq.yield(2);
38 REQUIRE(mlq.front(2) == values[2]);
39 REQUIRE(mlq.back(2) == -7.0);
40 REQUIRE(mlq.empty(8));
41 v = 10.0;
42 mlq.add(v, 8);
43 mlq.adjust(v, 8, 9);
44 REQUIRE(mlq.front(9) == v);
45 REQUIRE(mlq.empty(8));
46 REQUIRE(!mlq.empty(9));
47 mlq.adjust(values[0], 0, 9);
48 REQUIRE(mlq.highest_priority_set() == 1);
49 REQUIRE(mlq.lowest_priority_set() == 9);
50 mlq.remove(values[1], 1);
51 REQUIRE(mlq.highest_priority_set() == 2);
52 REQUIRE(mlq.empty(1));
53}
54
55} // namespace Common
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index abcee2a1c..e050f9aed 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library(video_core STATIC
25 command_classes/vic.h 25 command_classes/vic.h
26 compatible_formats.cpp 26 compatible_formats.cpp
27 compatible_formats.h 27 compatible_formats.h
28 delayed_destruction_ring.h
28 dirty_flags.cpp 29 dirty_flags.cpp
29 dirty_flags.h 30 dirty_flags.h
30 dma_pusher.cpp 31 dma_pusher.cpp
@@ -47,6 +48,7 @@ add_library(video_core STATIC
47 engines/shader_bytecode.h 48 engines/shader_bytecode.h
48 engines/shader_header.h 49 engines/shader_header.h
49 engines/shader_type.h 50 engines/shader_type.h
51 framebuffer_config.h
50 macro/macro.cpp 52 macro/macro.cpp
51 macro/macro.h 53 macro/macro.h
52 macro/macro_hle.cpp 54 macro/macro_hle.cpp
@@ -58,10 +60,6 @@ add_library(video_core STATIC
58 fence_manager.h 60 fence_manager.h
59 gpu.cpp 61 gpu.cpp
60 gpu.h 62 gpu.h
61 gpu_asynch.cpp
62 gpu_asynch.h
63 gpu_synch.cpp
64 gpu_synch.h
65 gpu_thread.cpp 63 gpu_thread.cpp
66 gpu_thread.h 64 gpu_thread.h
67 guest_driver.cpp 65 guest_driver.cpp
@@ -84,14 +82,10 @@ add_library(video_core STATIC
84 renderer_opengl/gl_device.h 82 renderer_opengl/gl_device.h
85 renderer_opengl/gl_fence_manager.cpp 83 renderer_opengl/gl_fence_manager.cpp
86 renderer_opengl/gl_fence_manager.h 84 renderer_opengl/gl_fence_manager.h
87 renderer_opengl/gl_framebuffer_cache.cpp
88 renderer_opengl/gl_framebuffer_cache.h
89 renderer_opengl/gl_rasterizer.cpp 85 renderer_opengl/gl_rasterizer.cpp
90 renderer_opengl/gl_rasterizer.h 86 renderer_opengl/gl_rasterizer.h
91 renderer_opengl/gl_resource_manager.cpp 87 renderer_opengl/gl_resource_manager.cpp
92 renderer_opengl/gl_resource_manager.h 88 renderer_opengl/gl_resource_manager.h
93 renderer_opengl/gl_sampler_cache.cpp
94 renderer_opengl/gl_sampler_cache.h
95 renderer_opengl/gl_shader_cache.cpp 89 renderer_opengl/gl_shader_cache.cpp
96 renderer_opengl/gl_shader_cache.h 90 renderer_opengl/gl_shader_cache.h
97 renderer_opengl/gl_shader_decompiler.cpp 91 renderer_opengl/gl_shader_decompiler.cpp
@@ -113,10 +107,68 @@ add_library(video_core STATIC
113 renderer_opengl/maxwell_to_gl.h 107 renderer_opengl/maxwell_to_gl.h
114 renderer_opengl/renderer_opengl.cpp 108 renderer_opengl/renderer_opengl.cpp
115 renderer_opengl/renderer_opengl.h 109 renderer_opengl/renderer_opengl.h
116 renderer_opengl/utils.cpp 110 renderer_opengl/util_shaders.cpp
117 renderer_opengl/utils.h 111 renderer_opengl/util_shaders.h
118 sampler_cache.cpp 112 renderer_vulkan/blit_image.cpp
119 sampler_cache.h 113 renderer_vulkan/blit_image.h
114 renderer_vulkan/fixed_pipeline_state.cpp
115 renderer_vulkan/fixed_pipeline_state.h
116 renderer_vulkan/maxwell_to_vk.cpp
117 renderer_vulkan/maxwell_to_vk.h
118 renderer_vulkan/nsight_aftermath_tracker.cpp
119 renderer_vulkan/nsight_aftermath_tracker.h
120 renderer_vulkan/renderer_vulkan.h
121 renderer_vulkan/renderer_vulkan.cpp
122 renderer_vulkan/vk_blit_screen.cpp
123 renderer_vulkan/vk_blit_screen.h
124 renderer_vulkan/vk_buffer_cache.cpp
125 renderer_vulkan/vk_buffer_cache.h
126 renderer_vulkan/vk_command_pool.cpp
127 renderer_vulkan/vk_command_pool.h
128 renderer_vulkan/vk_compute_pass.cpp
129 renderer_vulkan/vk_compute_pass.h
130 renderer_vulkan/vk_compute_pipeline.cpp
131 renderer_vulkan/vk_compute_pipeline.h
132 renderer_vulkan/vk_descriptor_pool.cpp
133 renderer_vulkan/vk_descriptor_pool.h
134 renderer_vulkan/vk_device.cpp
135 renderer_vulkan/vk_device.h
136 renderer_vulkan/vk_fence_manager.cpp
137 renderer_vulkan/vk_fence_manager.h
138 renderer_vulkan/vk_graphics_pipeline.cpp
139 renderer_vulkan/vk_graphics_pipeline.h
140 renderer_vulkan/vk_master_semaphore.cpp
141 renderer_vulkan/vk_master_semaphore.h
142 renderer_vulkan/vk_memory_manager.cpp
143 renderer_vulkan/vk_memory_manager.h
144 renderer_vulkan/vk_pipeline_cache.cpp
145 renderer_vulkan/vk_pipeline_cache.h
146 renderer_vulkan/vk_query_cache.cpp
147 renderer_vulkan/vk_query_cache.h
148 renderer_vulkan/vk_rasterizer.cpp
149 renderer_vulkan/vk_rasterizer.h
150 renderer_vulkan/vk_resource_pool.cpp
151 renderer_vulkan/vk_resource_pool.h
152 renderer_vulkan/vk_scheduler.cpp
153 renderer_vulkan/vk_scheduler.h
154 renderer_vulkan/vk_shader_decompiler.cpp
155 renderer_vulkan/vk_shader_decompiler.h
156 renderer_vulkan/vk_shader_util.cpp
157 renderer_vulkan/vk_shader_util.h
158 renderer_vulkan/vk_staging_buffer_pool.cpp
159 renderer_vulkan/vk_staging_buffer_pool.h
160 renderer_vulkan/vk_state_tracker.cpp
161 renderer_vulkan/vk_state_tracker.h
162 renderer_vulkan/vk_stream_buffer.cpp
163 renderer_vulkan/vk_stream_buffer.h
164 renderer_vulkan/vk_swapchain.cpp
165 renderer_vulkan/vk_swapchain.h
166 renderer_vulkan/vk_texture_cache.cpp
167 renderer_vulkan/vk_texture_cache.h
168 renderer_vulkan/vk_update_descriptor.cpp
169 renderer_vulkan/vk_update_descriptor.h
170 renderer_vulkan/wrapper.cpp
171 renderer_vulkan/wrapper.h
120 shader_cache.h 172 shader_cache.h
121 shader_notify.cpp 173 shader_notify.cpp
122 shader_notify.h 174 shader_notify.h
@@ -173,19 +225,32 @@ add_library(video_core STATIC
173 shader/transform_feedback.h 225 shader/transform_feedback.h
174 surface.cpp 226 surface.cpp
175 surface.h 227 surface.h
228 texture_cache/accelerated_swizzle.cpp
229 texture_cache/accelerated_swizzle.h
230 texture_cache/decode_bc4.cpp
231 texture_cache/decode_bc4.h
232 texture_cache/descriptor_table.h
233 texture_cache/formatter.cpp
234 texture_cache/formatter.h
176 texture_cache/format_lookup_table.cpp 235 texture_cache/format_lookup_table.cpp
177 texture_cache/format_lookup_table.h 236 texture_cache/format_lookup_table.h
178 texture_cache/surface_base.cpp 237 texture_cache/image_base.cpp
179 texture_cache/surface_base.h 238 texture_cache/image_base.h
180 texture_cache/surface_params.cpp 239 texture_cache/image_info.cpp
181 texture_cache/surface_params.h 240 texture_cache/image_info.h
182 texture_cache/surface_view.cpp 241 texture_cache/image_view_base.cpp
183 texture_cache/surface_view.h 242 texture_cache/image_view_base.h
243 texture_cache/image_view_info.cpp
244 texture_cache/image_view_info.h
245 texture_cache/render_targets.h
246 texture_cache/samples_helper.h
247 texture_cache/slot_vector.h
184 texture_cache/texture_cache.h 248 texture_cache/texture_cache.h
249 texture_cache/types.h
250 texture_cache/util.cpp
251 texture_cache/util.h
185 textures/astc.cpp 252 textures/astc.cpp
186 textures/astc.h 253 textures/astc.h
187 textures/convert.cpp
188 textures/convert.h
189 textures/decoders.cpp 254 textures/decoders.cpp
190 textures/decoders.h 255 textures/decoders.h
191 textures/texture.cpp 256 textures/texture.cpp
@@ -194,75 +259,6 @@ add_library(video_core STATIC
194 video_core.h 259 video_core.h
195) 260)
196 261
197if (ENABLE_VULKAN)
198 target_sources(video_core PRIVATE
199 renderer_vulkan/fixed_pipeline_state.cpp
200 renderer_vulkan/fixed_pipeline_state.h
201 renderer_vulkan/maxwell_to_vk.cpp
202 renderer_vulkan/maxwell_to_vk.h
203 renderer_vulkan/nsight_aftermath_tracker.cpp
204 renderer_vulkan/nsight_aftermath_tracker.h
205 renderer_vulkan/renderer_vulkan.h
206 renderer_vulkan/renderer_vulkan.cpp
207 renderer_vulkan/vk_blit_screen.cpp
208 renderer_vulkan/vk_blit_screen.h
209 renderer_vulkan/vk_buffer_cache.cpp
210 renderer_vulkan/vk_buffer_cache.h
211 renderer_vulkan/vk_command_pool.cpp
212 renderer_vulkan/vk_command_pool.h
213 renderer_vulkan/vk_compute_pass.cpp
214 renderer_vulkan/vk_compute_pass.h
215 renderer_vulkan/vk_compute_pipeline.cpp
216 renderer_vulkan/vk_compute_pipeline.h
217 renderer_vulkan/vk_descriptor_pool.cpp
218 renderer_vulkan/vk_descriptor_pool.h
219 renderer_vulkan/vk_device.cpp
220 renderer_vulkan/vk_device.h
221 renderer_vulkan/vk_fence_manager.cpp
222 renderer_vulkan/vk_fence_manager.h
223 renderer_vulkan/vk_graphics_pipeline.cpp
224 renderer_vulkan/vk_graphics_pipeline.h
225 renderer_vulkan/vk_image.cpp
226 renderer_vulkan/vk_image.h
227 renderer_vulkan/vk_master_semaphore.cpp
228 renderer_vulkan/vk_master_semaphore.h
229 renderer_vulkan/vk_memory_manager.cpp
230 renderer_vulkan/vk_memory_manager.h
231 renderer_vulkan/vk_pipeline_cache.cpp
232 renderer_vulkan/vk_pipeline_cache.h
233 renderer_vulkan/vk_query_cache.cpp
234 renderer_vulkan/vk_query_cache.h
235 renderer_vulkan/vk_rasterizer.cpp
236 renderer_vulkan/vk_rasterizer.h
237 renderer_vulkan/vk_renderpass_cache.cpp
238 renderer_vulkan/vk_renderpass_cache.h
239 renderer_vulkan/vk_resource_pool.cpp
240 renderer_vulkan/vk_resource_pool.h
241 renderer_vulkan/vk_sampler_cache.cpp
242 renderer_vulkan/vk_sampler_cache.h
243 renderer_vulkan/vk_scheduler.cpp
244 renderer_vulkan/vk_scheduler.h
245 renderer_vulkan/vk_shader_decompiler.cpp
246 renderer_vulkan/vk_shader_decompiler.h
247 renderer_vulkan/vk_shader_util.cpp
248 renderer_vulkan/vk_shader_util.h
249 renderer_vulkan/vk_staging_buffer_pool.cpp
250 renderer_vulkan/vk_staging_buffer_pool.h
251 renderer_vulkan/vk_state_tracker.cpp
252 renderer_vulkan/vk_state_tracker.h
253 renderer_vulkan/vk_stream_buffer.cpp
254 renderer_vulkan/vk_stream_buffer.h
255 renderer_vulkan/vk_swapchain.cpp
256 renderer_vulkan/vk_swapchain.h
257 renderer_vulkan/vk_texture_cache.cpp
258 renderer_vulkan/vk_texture_cache.h
259 renderer_vulkan/vk_update_descriptor.cpp
260 renderer_vulkan/vk_update_descriptor.h
261 renderer_vulkan/wrapper.cpp
262 renderer_vulkan/wrapper.h
263 )
264endif()
265
266create_target_directory_groups(video_core) 262create_target_directory_groups(video_core)
267 263
268target_link_libraries(video_core PUBLIC common core) 264target_link_libraries(video_core PUBLIC common core)
@@ -278,12 +274,8 @@ endif()
278 274
279add_dependencies(video_core host_shaders) 275add_dependencies(video_core host_shaders)
280target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) 276target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
281 277target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
282if (ENABLE_VULKAN) 278target_link_libraries(video_core PRIVATE sirit)
283 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
284 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
285 target_link_libraries(video_core PRIVATE sirit)
286endif()
287 279
288if (ENABLE_NSIGHT_AFTERMATH) 280if (ENABLE_NSIGHT_AFTERMATH)
289 if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) 281 if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
@@ -297,13 +289,21 @@ if (ENABLE_NSIGHT_AFTERMATH)
297endif() 289endif()
298 290
299if (MSVC) 291if (MSVC)
300 target_compile_options(video_core PRIVATE /we4267) 292 target_compile_options(video_core PRIVATE
293 /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
294 /we4456 # Declaration of 'identifier' hides previous local declaration
295 /we4457 # Declaration of 'identifier' hides function parameter
296 /we4458 # Declaration of 'identifier' hides class member
297 /we4459 # Declaration of 'identifier' hides global declaration
298 /we4715 # 'function' : not all control paths return a value
299 )
301else() 300else()
302 target_compile_options(video_core PRIVATE 301 target_compile_options(video_core PRIVATE
303 -Werror=conversion 302 -Werror=conversion
304 -Wno-error=sign-conversion 303 -Wno-error=sign-conversion
305 -Werror=pessimizing-move 304 -Werror=pessimizing-move
306 -Werror=redundant-move 305 -Werror=redundant-move
306 -Werror=shadow
307 -Werror=switch 307 -Werror=switch
308 -Werror=type-limits 308 -Werror=type-limits
309 -Werror=unused-variable 309 -Werror=unused-variable
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index e64170e66..e9306194a 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -4,34 +4,29 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <unordered_set>
8#include <utility>
9
10#include "common/alignment.h"
11#include "common/common_types.h" 7#include "common/common_types.h"
12#include "video_core/gpu.h"
13 8
14namespace VideoCommon { 9namespace VideoCommon {
15 10
16class BufferBlock { 11class BufferBlock {
17public: 12public:
18 bool Overlaps(VAddr start, VAddr end) const { 13 [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
19 return (cpu_addr < end) && (cpu_addr_end > start); 14 return (cpu_addr < end) && (cpu_addr_end > start);
20 } 15 }
21 16
22 bool IsInside(VAddr other_start, VAddr other_end) const { 17 [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
23 return cpu_addr <= other_start && other_end <= cpu_addr_end; 18 return cpu_addr <= other_start && other_end <= cpu_addr_end;
24 } 19 }
25 20
26 std::size_t Offset(VAddr in_addr) const { 21 [[nodiscard]] std::size_t Offset(VAddr in_addr) const {
27 return static_cast<std::size_t>(in_addr - cpu_addr); 22 return static_cast<std::size_t>(in_addr - cpu_addr);
28 } 23 }
29 24
30 VAddr CpuAddr() const { 25 [[nodiscard]] VAddr CpuAddr() const {
31 return cpu_addr; 26 return cpu_addr;
32 } 27 }
33 28
34 VAddr CpuAddrEnd() const { 29 [[nodiscard]] VAddr CpuAddrEnd() const {
35 return cpu_addr_end; 30 return cpu_addr_end;
36 } 31 }
37 32
@@ -40,11 +35,11 @@ public:
40 cpu_addr_end = new_addr + size; 35 cpu_addr_end = new_addr + size;
41 } 36 }
42 37
43 std::size_t Size() const { 38 [[nodiscard]] std::size_t Size() const {
44 return size; 39 return size;
45 } 40 }
46 41
47 u64 Epoch() const { 42 [[nodiscard]] u64 Epoch() const {
48 return epoch; 43 return epoch;
49 } 44 }
50 45
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index e7edd733f..83b9ee871 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -118,20 +118,17 @@ public:
118 /// Prepares the buffer cache for data uploading 118 /// Prepares the buffer cache for data uploading
119 /// @param max_size Maximum number of bytes that will be uploaded 119 /// @param max_size Maximum number of bytes that will be uploaded
120 /// @return True when a stream buffer invalidation was required, false otherwise 120 /// @return True when a stream buffer invalidation was required, false otherwise
121 bool Map(std::size_t max_size) { 121 void Map(std::size_t max_size) {
122 std::lock_guard lock{mutex}; 122 std::lock_guard lock{mutex};
123 123
124 bool invalidated; 124 std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
125 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
126 buffer_offset = buffer_offset_base; 125 buffer_offset = buffer_offset_base;
127
128 return invalidated;
129 } 126 }
130 127
131 /// Finishes the upload stream 128 /// Finishes the upload stream
132 void Unmap() { 129 void Unmap() {
133 std::lock_guard lock{mutex}; 130 std::lock_guard lock{mutex};
134 stream_buffer->Unmap(buffer_offset - buffer_offset_base); 131 stream_buffer.Unmap(buffer_offset - buffer_offset_base);
135 } 132 }
136 133
137 /// Function called at the end of each frame, inteded for deferred operations 134 /// Function called at the end of each frame, inteded for deferred operations
@@ -261,9 +258,9 @@ public:
261protected: 258protected:
262 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 259 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
263 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 260 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
264 std::unique_ptr<StreamBuffer> stream_buffer_) 261 StreamBuffer& stream_buffer_)
265 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, 262 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
266 stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} 263 stream_buffer{stream_buffer_} {}
267 264
268 ~BufferCache() = default; 265 ~BufferCache() = default;
269 266
@@ -441,7 +438,7 @@ private:
441 438
442 buffer_ptr += size; 439 buffer_ptr += size;
443 buffer_offset += size; 440 buffer_offset += size;
444 return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; 441 return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
445 } 442 }
446 443
447 void AlignBuffer(std::size_t alignment) { 444 void AlignBuffer(std::size_t alignment) {
@@ -545,7 +542,7 @@ private:
545 bool IsRegionWritten(VAddr start, VAddr end) const { 542 bool IsRegionWritten(VAddr start, VAddr end) const {
546 const u64 page_end = end >> WRITE_PAGE_BIT; 543 const u64 page_end = end >> WRITE_PAGE_BIT;
547 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { 544 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
548 if (written_pages.count(page_start) > 0) { 545 if (written_pages.contains(page_start)) {
549 return true; 546 return true;
550 } 547 }
551 } 548 }
@@ -567,9 +564,7 @@ private:
567 VideoCore::RasterizerInterface& rasterizer; 564 VideoCore::RasterizerInterface& rasterizer;
568 Tegra::MemoryManager& gpu_memory; 565 Tegra::MemoryManager& gpu_memory;
569 Core::Memory::Memory& cpu_memory; 566 Core::Memory::Memory& cpu_memory;
570 567 StreamBuffer& stream_buffer;
571 std::unique_ptr<StreamBuffer> stream_buffer;
572 BufferType stream_buffer_handle;
573 568
574 u8* buffer_ptr = nullptr; 569 u8* buffer_ptr = nullptr;
575 u64 buffer_offset = 0; 570 u64 buffer_offset = 0;
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index fe0bcd1d8..ef974b08a 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -84,9 +84,10 @@ private:
84 void FillFreeList(Chunk& chunk); 84 void FillFreeList(Chunk& chunk);
85 85
86 std::vector<MapInterval*> free_list; 86 std::vector<MapInterval*> free_list;
87 std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
88 87
89 Chunk first_chunk; 88 Chunk first_chunk;
89
90 std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
90}; 91};
91 92
92} // namespace VideoCommon 93} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index b60f86260..e3e7432f7 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -29,8 +29,8 @@
29#include "video_core/memory_manager.h" 29#include "video_core/memory_manager.h"
30 30
31namespace Tegra { 31namespace Tegra {
32CDmaPusher::CDmaPusher(GPU& gpu) 32CDmaPusher::CDmaPusher(GPU& gpu_)
33 : gpu(gpu), nvdec_processor(std::make_shared<Nvdec>(gpu)), 33 : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
34 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), 34 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
35 host1x_processor(std::make_unique<Host1x>(gpu)), 35 host1x_processor(std::make_unique<Host1x>(gpu)),
36 nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), 36 nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)),
@@ -100,11 +100,11 @@ void CDmaPusher::Step() {
100 } 100 }
101} 101}
102 102
103void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { 103void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
104 switch (current_class) { 104 switch (current_class) {
105 case ChClassId::NvDec: 105 case ChClassId::NvDec:
106 ThiStateWrite(nvdec_thi_state, offset, {data}); 106 ThiStateWrite(nvdec_thi_state, state_offset, {data});
107 switch (static_cast<ThiMethod>(offset)) { 107 switch (static_cast<ThiMethod>(state_offset)) {
108 case ThiMethod::IncSyncpt: { 108 case ThiMethod::IncSyncpt: {
109 LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); 109 LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
110 const auto syncpoint_id = static_cast<u32>(data & 0xFF); 110 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
@@ -120,16 +120,16 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
120 case ThiMethod::SetMethod1: 120 case ThiMethod::SetMethod1:
121 LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", 121 LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
122 static_cast<u32>(nvdec_thi_state.method_0)); 122 static_cast<u32>(nvdec_thi_state.method_0));
123 nvdec_processor->ProcessMethod( 123 nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
124 static_cast<Tegra::Nvdec::Method>(nvdec_thi_state.method_0), {data}); 124 {data});
125 break; 125 break;
126 default: 126 default:
127 break; 127 break;
128 } 128 }
129 break; 129 break;
130 case ChClassId::GraphicsVic: 130 case ChClassId::GraphicsVic:
131 ThiStateWrite(vic_thi_state, static_cast<u32>(offset), {data}); 131 ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data});
132 switch (static_cast<ThiMethod>(offset)) { 132 switch (static_cast<ThiMethod>(state_offset)) {
133 case ThiMethod::IncSyncpt: { 133 case ThiMethod::IncSyncpt: {
134 LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); 134 LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
135 const auto syncpoint_id = static_cast<u32>(data & 0xFF); 135 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
@@ -145,8 +145,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
145 case ThiMethod::SetMethod1: 145 case ThiMethod::SetMethod1:
146 LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", 146 LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
147 static_cast<u32>(vic_thi_state.method_0), data); 147 static_cast<u32>(vic_thi_state.method_0), data);
148 vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0), 148 vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), {data});
149 {data});
150 break; 149 break;
151 default: 150 default:
152 break; 151 break;
@@ -155,7 +154,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
155 case ChClassId::Host1x: 154 case ChClassId::Host1x:
156 // This device is mainly for syncpoint synchronization 155 // This device is mainly for syncpoint synchronization
157 LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); 156 LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
158 host1x_processor->ProcessMethod(static_cast<Tegra::Host1x::Method>(offset), {data}); 157 host1x_processor->ProcessMethod(static_cast<Host1x::Method>(state_offset), {data});
159 break; 158 break;
160 default: 159 default:
161 UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); 160 UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
@@ -163,9 +162,10 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
163 } 162 }
164} 163}
165 164
166void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments) { 165void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset,
167 u8* const state_offset = reinterpret_cast<u8*>(&state) + sizeof(u32) * offset; 166 const std::vector<u32>& arguments) {
168 std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size()); 167 u8* const state_offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
168 std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size());
169} 169}
170 170
171} // namespace Tegra 171} // namespace Tegra
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 982f309c5..0db1cd646 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -68,8 +68,8 @@ struct ChCommand {
68 std::vector<u32> arguments; 68 std::vector<u32> arguments;
69}; 69};
70 70
71using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>; 71using ChCommandHeaderList = std::vector<ChCommandHeader>;
72using ChCommandList = std::vector<Tegra::ChCommand>; 72using ChCommandList = std::vector<ChCommand>;
73 73
74struct ThiRegisters { 74struct ThiRegisters {
75 u32_le increment_syncpt{}; 75 u32_le increment_syncpt{};
@@ -96,7 +96,7 @@ enum class ThiMethod : u32 {
96 96
97class CDmaPusher { 97class CDmaPusher {
98public: 98public:
99 explicit CDmaPusher(GPU& gpu); 99 explicit CDmaPusher(GPU& gpu_);
100 ~CDmaPusher(); 100 ~CDmaPusher();
101 101
102 /// Push NVDEC command buffer entries into queue 102 /// Push NVDEC command buffer entries into queue
@@ -109,17 +109,17 @@ public:
109 void Step(); 109 void Step();
110 110
111 /// Invoke command class devices to execute the command based on the current state 111 /// Invoke command class devices to execute the command based on the current state
112 void ExecuteCommand(u32 offset, u32 data); 112 void ExecuteCommand(u32 state_offset, u32 data);
113 113
114private: 114private:
115 /// Write arguments value to the ThiRegisters member at the specified offset 115 /// Write arguments value to the ThiRegisters member at the specified offset
116 void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments); 116 void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
117 117
118 GPU& gpu; 118 GPU& gpu;
119 119
120 std::shared_ptr<Tegra::Nvdec> nvdec_processor; 120 std::shared_ptr<Nvdec> nvdec_processor;
121 std::unique_ptr<Tegra::Vic> vic_processor; 121 std::unique_ptr<Vic> vic_processor;
122 std::unique_ptr<Tegra::Host1x> host1x_processor; 122 std::unique_ptr<Host1x> host1x_processor;
123 std::unique_ptr<SyncptIncrManager> nvdec_sync; 123 std::unique_ptr<SyncptIncrManager> nvdec_sync;
124 std::unique_ptr<SyncptIncrManager> vic_sync; 124 std::unique_ptr<SyncptIncrManager> vic_sync;
125 ChClassId current_class{}; 125 ChClassId current_class{};
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index f547f5bd4..39bc923a5 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -44,7 +44,7 @@ Codec::~Codec() {
44} 44}
45 45
46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { 46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
47 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); 47 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec);
48 current_codec = codec; 48 current_codec = codec;
49} 49}
50 50
@@ -62,7 +62,7 @@ void Codec::Decode() {
62 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { 62 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
63 av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); 63 av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
64 } else { 64 } else {
65 LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec)); 65 LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
66 return; 66 return;
67 } 67 }
68 68
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 7d8d6ee3c..59e586695 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -233,7 +233,7 @@ constexpr std::array<s32, 254> map_lut{
233} 233}
234} // Anonymous namespace 234} // Anonymous namespace
235 235
236VP9::VP9(GPU& gpu) : gpu(gpu) {} 236VP9::VP9(GPU& gpu_) : gpu{gpu_} {}
237 237
238VP9::~VP9() = default; 238VP9::~VP9() = default;
239 239
@@ -374,43 +374,43 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
374} 374}
375 375
376Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { 376Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
377 Vp9FrameContainer frame{}; 377 Vp9FrameContainer current_frame{};
378 { 378 {
379 gpu.SyncGuestHost(); 379 gpu.SyncGuestHost();
380 frame.info = GetVp9PictureInfo(state); 380 current_frame.info = GetVp9PictureInfo(state);
381 frame.bit_stream.resize(frame.info.bitstream_size); 381 current_frame.bit_stream.resize(current_frame.info.bitstream_size);
382 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), 382 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
383 frame.info.bitstream_size); 383 current_frame.info.bitstream_size);
384 } 384 }
385 // Buffer two frames, saving the last show frame info 385 // Buffer two frames, saving the last show frame info
386 if (!next_next_frame.bit_stream.empty()) { 386 if (!next_next_frame.bit_stream.empty()) {
387 Vp9FrameContainer temp{ 387 Vp9FrameContainer temp{
388 .info = frame.info, 388 .info = current_frame.info,
389 .bit_stream = std::move(frame.bit_stream), 389 .bit_stream = std::move(current_frame.bit_stream),
390 }; 390 };
391 next_next_frame.info.show_frame = frame.info.last_frame_shown; 391 next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
392 frame.info = next_next_frame.info; 392 current_frame.info = next_next_frame.info;
393 frame.bit_stream = std::move(next_next_frame.bit_stream); 393 current_frame.bit_stream = std::move(next_next_frame.bit_stream);
394 next_next_frame = std::move(temp); 394 next_next_frame = std::move(temp);
395 395
396 if (!next_frame.bit_stream.empty()) { 396 if (!next_frame.bit_stream.empty()) {
397 Vp9FrameContainer temp2{ 397 Vp9FrameContainer temp2{
398 .info = frame.info, 398 .info = current_frame.info,
399 .bit_stream = std::move(frame.bit_stream), 399 .bit_stream = std::move(current_frame.bit_stream),
400 }; 400 };
401 next_frame.info.show_frame = frame.info.last_frame_shown; 401 next_frame.info.show_frame = current_frame.info.last_frame_shown;
402 frame.info = next_frame.info; 402 current_frame.info = next_frame.info;
403 frame.bit_stream = std::move(next_frame.bit_stream); 403 current_frame.bit_stream = std::move(next_frame.bit_stream);
404 next_frame = std::move(temp2); 404 next_frame = std::move(temp2);
405 } else { 405 } else {
406 next_frame.info = frame.info; 406 next_frame.info = current_frame.info;
407 next_frame.bit_stream = std::move(frame.bit_stream); 407 next_frame.bit_stream = std::move(current_frame.bit_stream);
408 } 408 }
409 } else { 409 } else {
410 next_next_frame.info = frame.info; 410 next_next_frame.info = current_frame.info;
411 next_next_frame.bit_stream = std::move(frame.bit_stream); 411 next_next_frame.bit_stream = std::move(current_frame.bit_stream);
412 } 412 }
413 return frame; 413 return current_frame;
414} 414}
415 415
416std::vector<u8> VP9::ComposeCompressedHeader() { 416std::vector<u8> VP9::ComposeCompressedHeader() {
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
index 9ebbbf59e..8396c8105 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -108,7 +108,7 @@ private:
108 108
109class VP9 { 109class VP9 {
110public: 110public:
111 explicit VP9(GPU& gpu); 111 explicit VP9(GPU& gpu_);
112 ~VP9(); 112 ~VP9();
113 113
114 VP9(const VP9&) = delete; 114 VP9(const VP9&) = delete;
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 6cfc193fa..aa8c9f9de 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -9,7 +9,7 @@
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h" 10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
12#include "video_core/texture_cache/surface_params.h" 12#include "video_core/textures/decoders.h"
13 13
14extern "C" { 14extern "C" {
15#include <libswscale/swscale.h> 15#include <libswscale/swscale.h>
@@ -27,7 +27,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) {
27} 27}
28 28
29void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { 29void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
30 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); 30 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method);
31 VicStateWrite(static_cast<u32>(method), arguments[0]); 31 VicStateWrite(static_cast<u32>(method), arguments[0]);
32 const u64 arg = static_cast<u64>(arguments[0]) << 8; 32 const u64 arg = static_cast<u64>(arguments[0]) << 8;
33 switch (method) { 33 switch (method) {
@@ -105,9 +105,9 @@ void Vic::Execute() {
105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, 105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
106 block_height, 0); 106 block_height, 0);
107 std::vector<u8> swizzled_data(size); 107 std::vector<u8> swizzled_data(size);
108 Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, 108 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
109 swizzled_data.data(), converted_frame_buffer.get(), 109 frame->width, 4, swizzled_data.data(),
110 false, block_height, 0, 1); 110 converted_frame_buffer.get(), block_height, 0, 0);
111 111
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); 112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
113 gpu.Maxwell3D().OnMemoryWrite(); 113 gpu.Maxwell3D().OnMemoryWrite();
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index b06c32c84..1619d8664 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -3,9 +3,9 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array> 5#include <array>
6#include <bitset>
7#include <cstddef> 6#include <cstddef>
8 7
8#include "common/common_types.h"
9#include "video_core/compatible_formats.h" 9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h" 10#include "video_core/surface.h"
11 11
@@ -13,23 +13,25 @@ namespace VideoCore::Surface {
13 13
14namespace { 14namespace {
15 15
16using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
17
16// Compatibility table taken from Table 3.X.2 in: 18// Compatibility table taken from Table 3.X.2 in:
17// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt 19// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
18 20
19constexpr std::array VIEW_CLASS_128_BITS = { 21constexpr std::array VIEW_CLASS_128_BITS{
20 PixelFormat::R32G32B32A32_FLOAT, 22 PixelFormat::R32G32B32A32_FLOAT,
21 PixelFormat::R32G32B32A32_UINT, 23 PixelFormat::R32G32B32A32_UINT,
22 PixelFormat::R32G32B32A32_SINT, 24 PixelFormat::R32G32B32A32_SINT,
23}; 25};
24 26
25constexpr std::array VIEW_CLASS_96_BITS = { 27constexpr std::array VIEW_CLASS_96_BITS{
26 PixelFormat::R32G32B32_FLOAT, 28 PixelFormat::R32G32B32_FLOAT,
27}; 29};
28// Missing formats: 30// Missing formats:
29// PixelFormat::RGB32UI, 31// PixelFormat::RGB32UI,
30// PixelFormat::RGB32I, 32// PixelFormat::RGB32I,
31 33
32constexpr std::array VIEW_CLASS_64_BITS = { 34constexpr std::array VIEW_CLASS_64_BITS{
33 PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, 35 PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
34 PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, 36 PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
35 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, 37 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
@@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {
38 40
39// TODO: How should we handle 48 bits? 41// TODO: How should we handle 48 bits?
40 42
41constexpr std::array VIEW_CLASS_32_BITS = { 43constexpr std::array VIEW_CLASS_32_BITS{
42 PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, 44 PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
43 PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, 45 PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
44 PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, 46 PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
@@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {
50 52
51// TODO: How should we handle 24 bits? 53// TODO: How should we handle 24 bits?
52 54
53constexpr std::array VIEW_CLASS_16_BITS = { 55constexpr std::array VIEW_CLASS_16_BITS{
54 PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, 56 PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
55 PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, 57 PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
56 PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, 58 PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
57}; 59};
58 60
59constexpr std::array VIEW_CLASS_8_BITS = { 61constexpr std::array VIEW_CLASS_8_BITS{
60 PixelFormat::R8_UINT, 62 PixelFormat::R8_UINT,
61 PixelFormat::R8_UNORM, 63 PixelFormat::R8_UNORM,
62 PixelFormat::R8_SINT, 64 PixelFormat::R8_SINT,
63 PixelFormat::R8_SNORM, 65 PixelFormat::R8_SNORM,
64}; 66};
65 67
66constexpr std::array VIEW_CLASS_RGTC1_RED = { 68constexpr std::array VIEW_CLASS_RGTC1_RED{
67 PixelFormat::BC4_UNORM, 69 PixelFormat::BC4_UNORM,
68 PixelFormat::BC4_SNORM, 70 PixelFormat::BC4_SNORM,
69}; 71};
70 72
71constexpr std::array VIEW_CLASS_RGTC2_RG = { 73constexpr std::array VIEW_CLASS_RGTC2_RG{
72 PixelFormat::BC5_UNORM, 74 PixelFormat::BC5_UNORM,
73 PixelFormat::BC5_SNORM, 75 PixelFormat::BC5_SNORM,
74}; 76};
75 77
76constexpr std::array VIEW_CLASS_BPTC_UNORM = { 78constexpr std::array VIEW_CLASS_BPTC_UNORM{
77 PixelFormat::BC7_UNORM, 79 PixelFormat::BC7_UNORM,
78 PixelFormat::BC7_SRGB, 80 PixelFormat::BC7_SRGB,
79}; 81};
80 82
81constexpr std::array VIEW_CLASS_BPTC_FLOAT = { 83constexpr std::array VIEW_CLASS_BPTC_FLOAT{
82 PixelFormat::BC6H_SFLOAT, 84 PixelFormat::BC6H_SFLOAT,
83 PixelFormat::BC6H_UFLOAT, 85 PixelFormat::BC6H_UFLOAT,
84}; 86};
85 87
88constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
89 PixelFormat::ASTC_2D_4X4_UNORM,
90 PixelFormat::ASTC_2D_4X4_SRGB,
91};
92
93constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
94 PixelFormat::ASTC_2D_5X4_UNORM,
95 PixelFormat::ASTC_2D_5X4_SRGB,
96};
97
98constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
99 PixelFormat::ASTC_2D_5X5_UNORM,
100 PixelFormat::ASTC_2D_5X5_SRGB,
101};
102
103constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
104 PixelFormat::ASTC_2D_6X5_UNORM,
105 PixelFormat::ASTC_2D_6X5_SRGB,
106};
107
108constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
109 PixelFormat::ASTC_2D_6X6_UNORM,
110 PixelFormat::ASTC_2D_6X6_SRGB,
111};
112
113constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
114 PixelFormat::ASTC_2D_8X5_UNORM,
115 PixelFormat::ASTC_2D_8X5_SRGB,
116};
117
118constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
119 PixelFormat::ASTC_2D_8X8_UNORM,
120 PixelFormat::ASTC_2D_8X8_SRGB,
121};
122
123// Missing formats:
124// PixelFormat::ASTC_2D_10X5_UNORM
125// PixelFormat::ASTC_2D_10X5_SRGB
126
127// Missing formats:
128// PixelFormat::ASTC_2D_10X6_UNORM
129// PixelFormat::ASTC_2D_10X6_SRGB
130
131constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
132 PixelFormat::ASTC_2D_10X8_UNORM,
133 PixelFormat::ASTC_2D_10X8_SRGB,
134};
135
136constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
137 PixelFormat::ASTC_2D_10X10_UNORM,
138 PixelFormat::ASTC_2D_10X10_SRGB,
139};
140
141// Missing formats
142// ASTC_2D_12X10_UNORM,
143// ASTC_2D_12X10_SRGB,
144
145constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
146 PixelFormat::ASTC_2D_12X12_UNORM,
147 PixelFormat::ASTC_2D_12X12_SRGB,
148};
149
86// Compatibility table taken from Table 4.X.1 in: 150// Compatibility table taken from Table 4.X.1 in:
87// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt 151// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
88 152
89constexpr std::array COPY_CLASS_128_BITS = { 153constexpr std::array COPY_CLASS_128_BITS{
90 PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, 154 PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
91 PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, 155 PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
92 PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, 156 PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
@@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
97// PixelFormat::RGBA32I 161// PixelFormat::RGBA32I
98// COMPRESSED_RG_RGTC2 162// COMPRESSED_RG_RGTC2
99 163
100constexpr std::array COPY_CLASS_64_BITS = { 164constexpr std::array COPY_CLASS_64_BITS{
101 PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, 165 PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
102 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, 166 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
103 PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, 167 PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
@@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
110// COMPRESSED_RGBA_S3TC_DXT1_EXT 174// COMPRESSED_RGBA_S3TC_DXT1_EXT
111// COMPRESSED_SIGNED_RED_RGTC1 175// COMPRESSED_SIGNED_RED_RGTC1
112 176
113void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { 177constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
114 compatiblity[format_a][format_b] = true; 178 table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
115 compatiblity[format_b][format_a] = true; 179 table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
116} 180}
117 181
118void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { 182constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
119 Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); 183 Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
120} 184}
121 185
122template <typename Range> 186template <typename Range>
123void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { 187constexpr void EnableRange(Table& table, const Range& range) {
124 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { 188 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
125 for (auto it_b = it_a; it_b != range.end(); ++it_b) { 189 for (auto it_b = it_a; it_b != range.end(); ++it_b) {
126 Enable(compatibility, *it_a, *it_b); 190 Enable(table, *it_a, *it_b);
127 } 191 }
128 } 192 }
129} 193}
130 194
131} // Anonymous namespace 195constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
196 const size_t a = static_cast<size_t>(format_a);
197 const size_t b = static_cast<size_t>(format_b);
198 return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
199}
132 200
133FormatCompatibility::FormatCompatibility() { 201constexpr Table MakeViewTable() {
202 Table view{};
134 for (size_t i = 0; i < MaxPixelFormat; ++i) { 203 for (size_t i = 0; i < MaxPixelFormat; ++i) {
135 // Identity is allowed 204 // Identity is allowed
136 Enable(view, i, i); 205 Enable(view, i, i);
137 } 206 }
138
139 EnableRange(view, VIEW_CLASS_128_BITS); 207 EnableRange(view, VIEW_CLASS_128_BITS);
140 EnableRange(view, VIEW_CLASS_96_BITS); 208 EnableRange(view, VIEW_CLASS_96_BITS);
141 EnableRange(view, VIEW_CLASS_64_BITS); 209 EnableRange(view, VIEW_CLASS_64_BITS);
@@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() {
146 EnableRange(view, VIEW_CLASS_RGTC2_RG); 214 EnableRange(view, VIEW_CLASS_RGTC2_RG);
147 EnableRange(view, VIEW_CLASS_BPTC_UNORM); 215 EnableRange(view, VIEW_CLASS_BPTC_UNORM);
148 EnableRange(view, VIEW_CLASS_BPTC_FLOAT); 216 EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
217 EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
218 EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
219 EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
220 EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
221 EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
222 EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
223 EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
224 EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
225 EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
226 EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
227 return view;
228}
149 229
150 copy = view; 230constexpr Table MakeCopyTable() {
231 Table copy = MakeViewTable();
151 EnableRange(copy, COPY_CLASS_128_BITS); 232 EnableRange(copy, COPY_CLASS_128_BITS);
152 EnableRange(copy, COPY_CLASS_64_BITS); 233 EnableRange(copy, COPY_CLASS_64_BITS);
234 return copy;
235}
236
237} // Anonymous namespace
238
239bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) {
240 static constexpr Table TABLE = MakeViewTable();
241 return IsSupported(TABLE, format_a, format_b);
242}
243
244bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
245 static constexpr Table TABLE = MakeCopyTable();
246 return IsSupported(TABLE, format_a, format_b);
153} 247}
154 248
155} // namespace VideoCore::Surface 249} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index 51766349b..b5eb03bea 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -4,31 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <bitset>
9#include <cstddef>
10
11#include "video_core/surface.h" 7#include "video_core/surface.h"
12 8
13namespace VideoCore::Surface { 9namespace VideoCore::Surface {
14 10
15class FormatCompatibility { 11bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b);
16public:
17 using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
18
19 explicit FormatCompatibility();
20
21 bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
22 return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
23 }
24
25 bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
26 return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
27 }
28 12
29private: 13bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
30 Table view;
31 Table copy;
32};
33 14
34} // namespace VideoCore::Surface 15} // namespace VideoCore::Surface
diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h
new file mode 100644
index 000000000..4f1d29c04
--- /dev/null
+++ b/src/video_core/delayed_destruction_ring.h
@@ -0,0 +1,32 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <utility>
10#include <vector>
11
12namespace VideoCommon {
13
14/// Container to push objects to be destroyed a few ticks in the future
15template <typename T, size_t TICKS_TO_DESTROY>
16class DelayedDestructionRing {
17public:
18 void Tick() {
19 index = (index + 1) % TICKS_TO_DESTROY;
20 elements[index].clear();
21 }
22
23 void Push(T&& object) {
24 elements[index].push_back(std::move(object));
25 }
26
27private:
28 size_t index = 0;
29 std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
30};
31
32} // namespace VideoCommon
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index e16075993..b1eaac00c 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -9,13 +9,16 @@
9#include "video_core/dirty_flags.h" 9#include "video_core/dirty_flags.h"
10 10
11#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) 11#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32)) 12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
13 13
14namespace VideoCommon::Dirty { 14namespace VideoCommon::Dirty {
15 15
16using Tegra::Engines::Maxwell3D; 16using Tegra::Engines::Maxwell3D;
17 17
18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { 18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
19 FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
20 FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
21
19 static constexpr std::size_t num_per_rt = NUM(rt[0]); 22 static constexpr std::size_t num_per_rt = NUM(rt[0]);
20 static constexpr std::size_t begin = OFF(rt); 23 static constexpr std::size_t begin = OFF(rt);
21 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; 24 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
23 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); 26 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
24 } 27 }
25 FillBlock(tables[1], begin, num, RenderTargets); 28 FillBlock(tables[1], begin, num, RenderTargets);
29 FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
30
31 tables[0][OFF(rt_control)] = RenderTargets;
32 tables[1][OFF(rt_control)] = RenderTargetControl;
26 33
27 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; 34 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
28 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { 35 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 3f6c1d83a..875527ddd 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
16enum : u8 { 16enum : u8 {
17 NullEntry = 0, 17 NullEntry = 0,
18 18
19 Descriptors,
20
19 RenderTargets, 21 RenderTargets,
22 RenderTargetControl,
20 ColorBuffer0, 23 ColorBuffer0,
21 ColorBuffer1, 24 ColorBuffer1,
22 ColorBuffer2, 25 ColorBuffer2,
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index d8801b1f5..2c8b20024 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -13,7 +13,7 @@
13 13
14namespace Tegra { 14namespace Tegra {
15 15
16DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} 16DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
17 17
18DmaPusher::~DmaPusher() = default; 18DmaPusher::~DmaPusher() = default;
19 19
@@ -152,7 +152,12 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
152 152
153void DmaPusher::CallMethod(u32 argument) const { 153void DmaPusher::CallMethod(u32 argument) const {
154 if (dma_state.method < non_puller_methods) { 154 if (dma_state.method < non_puller_methods) {
155 gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); 155 gpu.CallMethod(GPU::MethodCall{
156 dma_state.method,
157 argument,
158 dma_state.subchannel,
159 dma_state.method_count,
160 });
156 } else { 161 } else {
157 subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, 162 subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
158 dma_state.is_last_call); 163 dma_state.is_last_call);
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 96ac267f7..19f286fa7 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -87,11 +87,11 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
87struct CommandList final { 87struct CommandList final {
88 CommandList() = default; 88 CommandList() = default;
89 explicit CommandList(std::size_t size) : command_lists(size) {} 89 explicit CommandList(std::size_t size) : command_lists(size) {}
90 explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list) 90 explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_)
91 : prefetch_command_list{std::move(prefetch_command_list)} {} 91 : prefetch_command_list{std::move(prefetch_command_list_)} {}
92 92
93 std::vector<Tegra::CommandListHeader> command_lists; 93 std::vector<CommandListHeader> command_lists;
94 std::vector<Tegra::CommandHeader> prefetch_command_list; 94 std::vector<CommandHeader> prefetch_command_list;
95}; 95};
96 96
97/** 97/**
@@ -103,7 +103,7 @@ struct CommandList final {
103 */ 103 */
104class DmaPusher final { 104class DmaPusher final {
105public: 105public:
106 explicit DmaPusher(Core::System& system, GPU& gpu); 106 explicit DmaPusher(Core::System& system_, GPU& gpu_);
107 ~DmaPusher(); 107 ~DmaPusher();
108 108
109 void Push(CommandList&& entries) { 109 void Push(CommandList&& entries) {
@@ -112,7 +112,7 @@ public:
112 112
113 void DispatchCalls(); 113 void DispatchCalls();
114 114
115 void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) { 115 void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) {
116 subchannels[subchannel_id] = engine; 116 subchannels[subchannel_id] = engine;
117 } 117 }
118 118
@@ -145,7 +145,7 @@ private:
145 145
146 bool ib_enable{true}; ///< IB mode enabled 146 bool ib_enable{true}; ///< IB mode enabled
147 147
148 std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{}; 148 std::array<Engines::EngineInterface*, max_subchannels> subchannels{};
149 149
150 GPU& gpu; 150 GPU& gpu;
151 Core::System& system; 151 Core::System& system;
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index d44ad0cd8..71d7e1473 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -11,16 +11,16 @@
11 11
12namespace Tegra::Engines::Upload { 12namespace Tegra::Engines::Upload {
13 13
14State::State(MemoryManager& memory_manager, Registers& regs) 14State::State(MemoryManager& memory_manager_, Registers& regs_)
15 : regs{regs}, memory_manager{memory_manager} {} 15 : regs{regs_}, memory_manager{memory_manager_} {}
16 16
17State::~State() = default; 17State::~State() = default;
18 18
19void State::ProcessExec(const bool is_linear) { 19void State::ProcessExec(const bool is_linear_) {
20 write_offset = 0; 20 write_offset = 0;
21 copy_size = regs.line_length_in * regs.line_count; 21 copy_size = regs.line_length_in * regs.line_count;
22 inner_buffer.resize(copy_size); 22 inner_buffer.resize(copy_size);
23 this->is_linear = is_linear; 23 is_linear = is_linear_;
24} 24}
25 25
26void State::ProcessData(const u32 data, const bool is_last_call) { 26void State::ProcessData(const u32 data, const bool is_last_call) {
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 462da419e..1c7f1effa 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -54,10 +54,10 @@ struct Registers {
54 54
55class State { 55class State {
56public: 56public:
57 State(MemoryManager& memory_manager, Registers& regs); 57 explicit State(MemoryManager& memory_manager_, Registers& regs_);
58 ~State(); 58 ~State();
59 59
60 void ProcessExec(bool is_linear); 60 void ProcessExec(bool is_linear_);
61 void ProcessData(u32 data, bool is_last_call); 61 void ProcessData(u32 data, bool is_last_call);
62 62
63private: 63private:
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 9409c4075..a01d334ad 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,11 @@
10 10
11namespace Tegra::Engines { 11namespace Tegra::Engines {
12 12
13Fermi2D::Fermi2D() = default; 13Fermi2D::Fermi2D() {
14 // Nvidia's OpenGL driver seems to assume these values
15 regs.src.depth = 1;
16 regs.dst.depth = 1;
17}
14 18
15Fermi2D::~Fermi2D() = default; 19Fermi2D::~Fermi2D() = default;
16 20
@@ -21,79 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
21void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 25void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
22 ASSERT_MSG(method < Regs::NUM_REGS, 26 ASSERT_MSG(method < Regs::NUM_REGS,
23 "Invalid Fermi2D register, increase the size of the Regs structure"); 27 "Invalid Fermi2D register, increase the size of the Regs structure");
24
25 regs.reg_array[method] = method_argument; 28 regs.reg_array[method] = method_argument;
26 29
27 switch (method) { 30 if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
28 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, 31 Blit();
29 // so trigger on the second 32-bit write.
30 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
31 HandleSurfaceCopy();
32 break;
33 }
34 } 32 }
35} 33}
36 34
37void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { 35void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
38 for (std::size_t i = 0; i < amount; i++) { 36 for (u32 i = 0; i < amount; ++i) {
39 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); 37 CallMethod(method, base_start[i], methods_pending - i <= 1);
40 } 38 }
41} 39}
42 40
43static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { 41void Fermi2D::Blit() {
44 const u32 line_a = src_2 - src_1; 42 LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
45 const u32 line_b = dst_2 - dst_1; 43 regs.src.Address(), regs.dst.Address());
46 const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
47 return {line_b - (excess * line_b) / line_a, excess};
48}
49
50void Fermi2D::HandleSurfaceCopy() {
51 LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
52 static_cast<u32>(regs.operation));
53 44
54 // TODO(Subv): Only raw copies are implemented. 45 UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
55 ASSERT(regs.operation == Operation::SrcCopy); 46 UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
47 UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
48 UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
49 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
56 50
57 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; 51 const auto& args = regs.pixels_from_memory;
58 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; 52 const Config config{
59 u32 src_blit_x2, src_blit_y2;
60 if (regs.blit_control.origin == Origin::Corner) {
61 src_blit_x2 =
62 static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
63 src_blit_y2 =
64 static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
65 } else {
66 src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
67 src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
68 }
69 u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
70 u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
71 const auto [new_dst_w, src_excess_x] =
72 DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
73 const auto [new_dst_h, src_excess_y] =
74 DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
75 dst_blit_x2 = new_dst_w + regs.blit_dst_x;
76 src_blit_x2 = src_blit_x2 - src_excess_x;
77 dst_blit_y2 = new_dst_h + regs.blit_dst_y;
78 src_blit_y2 = src_blit_y2 - src_excess_y;
79 const auto [new_src_w, dst_excess_x] =
80 DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
81 const auto [new_src_h, dst_excess_y] =
82 DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
83 src_blit_x2 = new_src_w + src_blit_x1;
84 dst_blit_x2 = dst_blit_x2 - dst_excess_x;
85 src_blit_y2 = new_src_h + src_blit_y1;
86 dst_blit_y2 = dst_blit_y2 - dst_excess_y;
87 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
88 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
89 dst_blit_y2};
90 const Config copy_config{
91 .operation = regs.operation, 53 .operation = regs.operation,
92 .filter = regs.blit_control.filter, 54 .filter = args.sample_mode.filter,
93 .src_rect = src_rect, 55 .dst_x0 = args.dst_x0,
94 .dst_rect = dst_rect, 56 .dst_y0 = args.dst_y0,
57 .dst_x1 = args.dst_x0 + args.dst_width,
58 .dst_y1 = args.dst_y0 + args.dst_height,
59 .src_x0 = static_cast<s32>(args.src_x0 >> 32),
60 .src_y0 = static_cast<s32>(args.src_y0 >> 32),
61 .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
62 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
95 }; 63 };
96 if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { 64 if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
97 UNIMPLEMENTED(); 65 UNIMPLEMENTED();
98 } 66 }
99} 67}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0909709ec..81522988e 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -53,8 +53,8 @@ public:
53 }; 53 };
54 54
55 enum class Filter : u32 { 55 enum class Filter : u32 {
56 PointSample = 0, // Nearest 56 Point = 0,
57 Linear = 1, 57 Bilinear = 1,
58 }; 58 };
59 59
60 enum class Operation : u32 { 60 enum class Operation : u32 {
@@ -67,88 +67,235 @@ public:
67 BlendPremult = 6, 67 BlendPremult = 6,
68 }; 68 };
69 69
70 struct Regs { 70 enum class MemoryLayout : u32 {
71 static constexpr std::size_t NUM_REGS = 0x258; 71 BlockLinear = 0,
72 Pitch = 1,
73 };
72 74
73 struct Surface { 75 enum class CpuIndexWrap : u32 {
74 RenderTargetFormat format; 76 Wrap = 0,
75 BitField<0, 1, u32> linear; 77 NoWrap = 1,
76 union { 78 };
77 BitField<0, 4, u32> block_width;
78 BitField<4, 4, u32> block_height;
79 BitField<8, 4, u32> block_depth;
80 };
81 u32 depth;
82 u32 layer;
83 u32 pitch;
84 u32 width;
85 u32 height;
86 u32 address_high;
87 u32 address_low;
88
89 GPUVAddr Address() const {
90 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
91 address_low);
92 }
93
94 u32 BlockWidth() const {
95 return block_width.Value();
96 }
97
98 u32 BlockHeight() const {
99 return block_height.Value();
100 }
101
102 u32 BlockDepth() const {
103 return block_depth.Value();
104 }
105 };
106 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
107 79
80 struct Surface {
81 RenderTargetFormat format;
82 MemoryLayout linear;
108 union { 83 union {
109 struct { 84 BitField<0, 4, u32> block_width;
110 INSERT_UNION_PADDING_WORDS(0x80); 85 BitField<4, 4, u32> block_height;
86 BitField<8, 4, u32> block_depth;
87 };
88 u32 depth;
89 u32 layer;
90 u32 pitch;
91 u32 width;
92 u32 height;
93 u32 addr_upper;
94 u32 addr_lower;
95
96 [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
97 return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
98 }
99 };
100 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
111 101
112 Surface dst; 102 enum class SectorPromotion : u32 {
103 NoPromotion = 0,
104 PromoteTo2V = 1,
105 PromoteTo2H = 2,
106 PromoteTo4 = 3,
107 };
108
109 enum class NumTpcs : u32 {
110 All = 0,
111 One = 1,
112 };
113 113
114 INSERT_UNION_PADDING_WORDS(2); 114 enum class RenderEnableMode : u32 {
115 False = 0,
116 True = 1,
117 Conditional = 2,
118 RenderIfEqual = 3,
119 RenderIfNotEqual = 4,
120 };
115 121
116 Surface src; 122 enum class ColorKeyFormat : u32 {
123 A16R56G6B5 = 0,
124 A1R5G55B5 = 1,
125 A8R8G8B8 = 2,
126 A2R10G10B10 = 3,
127 Y8 = 4,
128 Y16 = 5,
129 Y32 = 6,
130 };
117 131
118 INSERT_UNION_PADDING_WORDS(0x15); 132 union Beta4 {
133 BitField<0, 8, u32> b;
134 BitField<8, 8, u32> g;
135 BitField<16, 8, u32> r;
136 BitField<24, 8, u32> a;
137 };
119 138
120 Operation operation; 139 struct Point {
140 u32 x;
141 u32 y;
142 };
121 143
122 INSERT_UNION_PADDING_WORDS(0x177); 144 enum class PatternSelect : u32 {
145 MonoChrome8x8 = 0,
146 MonoChrome64x1 = 1,
147 MonoChrome1x64 = 2,
148 Color = 3,
149 };
123 150
151 enum class NotifyType : u32 {
152 WriteOnly = 0,
153 WriteThenAwaken = 1,
154 };
155
156 enum class MonochromePatternColorFormat : u32 {
157 A8X8R8G6B5 = 0,
158 A1R5G5B5 = 1,
159 A8R8G8B8 = 2,
160 A8Y8 = 3,
161 A8X8Y16 = 4,
162 Y32 = 5,
163 };
164
165 enum class MonochromePatternFormat : u32 {
166 CGA6_M1 = 0,
167 LE_M1 = 1,
168 };
169
170 union Regs {
171 static constexpr std::size_t NUM_REGS = 0x258;
172 struct {
173 u32 object;
174 INSERT_UNION_PADDING_WORDS(0x3F);
175 u32 no_operation;
176 NotifyType notify;
177 INSERT_UNION_PADDING_WORDS(0x2);
178 u32 wait_for_idle;
179 INSERT_UNION_PADDING_WORDS(0xB);
180 u32 pm_trigger;
181 INSERT_UNION_PADDING_WORDS(0xF);
182 u32 context_dma_notify;
183 u32 dst_context_dma;
184 u32 src_context_dma;
185 u32 semaphore_context_dma;
186 INSERT_UNION_PADDING_WORDS(0x1C);
187 Surface dst;
188 CpuIndexWrap pixels_from_cpu_index_wrap;
189 u32 kind2d_check_enable;
190 Surface src;
191 SectorPromotion pixels_from_memory_sector_promotion;
192 INSERT_UNION_PADDING_WORDS(0x1);
193 NumTpcs num_tpcs;
194 u32 render_enable_addr_upper;
195 u32 render_enable_addr_lower;
196 RenderEnableMode render_enable_mode;
197 INSERT_UNION_PADDING_WORDS(0x4);
198 u32 clip_x0;
199 u32 clip_y0;
200 u32 clip_width;
201 u32 clip_height;
202 BitField<0, 1, u32> clip_enable;
203 BitField<0, 3, ColorKeyFormat> color_key_format;
204 u32 color_key;
205 BitField<0, 1, u32> color_key_enable;
206 BitField<0, 8, u32> rop;
207 u32 beta1;
208 Beta4 beta4;
209 Operation operation;
210 union {
211 BitField<0, 6, u32> x;
212 BitField<8, 6, u32> y;
213 } pattern_offset;
214 BitField<0, 2, PatternSelect> pattern_select;
215 INSERT_UNION_PADDING_WORDS(0xC);
216 struct {
217 BitField<0, 3, MonochromePatternColorFormat> color_format;
218 BitField<0, 1, MonochromePatternFormat> format;
219 u32 color0;
220 u32 color1;
221 u32 pattern0;
222 u32 pattern1;
223 } monochrome_pattern;
224 struct {
225 std::array<u32, 0x40> X8R8G8B8;
226 std::array<u32, 0x20> R5G6B5;
227 std::array<u32, 0x20> X1R5G5B5;
228 std::array<u32, 0x10> Y8;
229 } color_pattern;
230 INSERT_UNION_PADDING_WORDS(0x10);
231 struct {
232 u32 prim_mode;
233 u32 prim_color_format;
234 u32 prim_color;
235 u32 line_tie_break_bits;
236 INSERT_UNION_PADDING_WORDS(0x14);
237 u32 prim_point_xy;
238 INSERT_UNION_PADDING_WORDS(0x7);
239 std::array<Point, 0x40> prim_point;
240 } render_solid;
241 struct {
242 u32 data_type;
243 u32 color_format;
244 u32 index_format;
245 u32 mono_format;
246 u32 wrap;
247 u32 color0;
248 u32 color1;
249 u32 mono_opacity;
250 INSERT_UNION_PADDING_WORDS(0x6);
251 u32 src_width;
252 u32 src_height;
253 u32 dx_du_frac;
254 u32 dx_du_int;
255 u32 dx_dv_frac;
256 u32 dy_dv_int;
257 u32 dst_x0_frac;
258 u32 dst_x0_int;
259 u32 dst_y0_frac;
260 u32 dst_y0_int;
261 u32 data;
262 } pixels_from_cpu;
263 INSERT_UNION_PADDING_WORDS(0x3);
264 u32 big_endian_control;
265 INSERT_UNION_PADDING_WORDS(0x3);
266 struct {
267 BitField<0, 3, u32> block_shape;
268 BitField<0, 5, u32> corral_size;
269 BitField<0, 1, u32> safe_overlap;
124 union { 270 union {
125 u32 raw;
126 BitField<0, 1, Origin> origin; 271 BitField<0, 1, Origin> origin;
127 BitField<4, 1, Filter> filter; 272 BitField<4, 1, Filter> filter;
128 } blit_control; 273 } sample_mode;
129
130 INSERT_UNION_PADDING_WORDS(0x8); 274 INSERT_UNION_PADDING_WORDS(0x8);
131 275 s32 dst_x0;
132 u32 blit_dst_x; 276 s32 dst_y0;
133 u32 blit_dst_y; 277 s32 dst_width;
134 u32 blit_dst_width; 278 s32 dst_height;
135 u32 blit_dst_height; 279 s64 du_dx;
136 u64 blit_du_dx; 280 s64 dv_dy;
137 u64 blit_dv_dy; 281 s64 src_x0;
138 u64 blit_src_x; 282 s64 src_y0;
139 u64 blit_src_y; 283 } pixels_from_memory;
140
141 INSERT_UNION_PADDING_WORDS(0x21);
142 };
143 std::array<u32, NUM_REGS> reg_array;
144 }; 284 };
285 std::array<u32, NUM_REGS> reg_array;
145 } regs{}; 286 } regs{};
146 287
147 struct Config { 288 struct Config {
148 Operation operation{}; 289 Operation operation;
149 Filter filter{}; 290 Filter filter;
150 Common::Rectangle<u32> src_rect; 291 s32 dst_x0;
151 Common::Rectangle<u32> dst_rect; 292 s32 dst_y0;
293 s32 dst_x1;
294 s32 dst_y1;
295 s32 src_x0;
296 s32 src_y0;
297 s32 src_x1;
298 s32 src_y1;
152 }; 299 };
153 300
154private: 301private:
@@ -156,25 +303,49 @@ private:
156 303
157 /// Performs the copy from the source surface to the destination surface as configured in the 304 /// Performs the copy from the source surface to the destination surface as configured in the
158 /// registers. 305 /// registers.
159 void HandleSurfaceCopy(); 306 void Blit();
160}; 307};
161 308
162#define ASSERT_REG_POSITION(field_name, position) \ 309#define ASSERT_REG_POSITION(field_name, position) \
163 static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ 310 static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
164 "Field " #field_name " has invalid position") 311 "Field " #field_name " has invalid position")
165 312
166ASSERT_REG_POSITION(dst, 0x80); 313ASSERT_REG_POSITION(object, 0x0);
167ASSERT_REG_POSITION(src, 0x8C); 314ASSERT_REG_POSITION(no_operation, 0x100);
168ASSERT_REG_POSITION(operation, 0xAB); 315ASSERT_REG_POSITION(notify, 0x104);
169ASSERT_REG_POSITION(blit_control, 0x223); 316ASSERT_REG_POSITION(wait_for_idle, 0x110);
170ASSERT_REG_POSITION(blit_dst_x, 0x22c); 317ASSERT_REG_POSITION(pm_trigger, 0x140);
171ASSERT_REG_POSITION(blit_dst_y, 0x22d); 318ASSERT_REG_POSITION(context_dma_notify, 0x180);
172ASSERT_REG_POSITION(blit_dst_width, 0x22e); 319ASSERT_REG_POSITION(dst_context_dma, 0x184);
173ASSERT_REG_POSITION(blit_dst_height, 0x22f); 320ASSERT_REG_POSITION(src_context_dma, 0x188);
174ASSERT_REG_POSITION(blit_du_dx, 0x230); 321ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
175ASSERT_REG_POSITION(blit_dv_dy, 0x232); 322ASSERT_REG_POSITION(dst, 0x200);
176ASSERT_REG_POSITION(blit_src_x, 0x234); 323ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
177ASSERT_REG_POSITION(blit_src_y, 0x236); 324ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
325ASSERT_REG_POSITION(src, 0x230);
326ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
327ASSERT_REG_POSITION(num_tpcs, 0x260);
328ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
329ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
330ASSERT_REG_POSITION(clip_x0, 0x280);
331ASSERT_REG_POSITION(clip_y0, 0x284);
332ASSERT_REG_POSITION(clip_width, 0x288);
333ASSERT_REG_POSITION(clip_height, 0x28c);
334ASSERT_REG_POSITION(clip_enable, 0x290);
335ASSERT_REG_POSITION(color_key_format, 0x294);
336ASSERT_REG_POSITION(color_key, 0x298);
337ASSERT_REG_POSITION(rop, 0x2A0);
338ASSERT_REG_POSITION(beta1, 0x2A4);
339ASSERT_REG_POSITION(beta4, 0x2A8);
340ASSERT_REG_POSITION(operation, 0x2AC);
341ASSERT_REG_POSITION(pattern_offset, 0x2B0);
342ASSERT_REG_POSITION(pattern_select, 0x2B4);
343ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
344ASSERT_REG_POSITION(color_pattern, 0x300);
345ASSERT_REG_POSITION(render_solid, 0x580);
346ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
347ASSERT_REG_POSITION(big_endian_control, 0x870);
348ASSERT_REG_POSITION(pixels_from_memory, 0x880);
178 349
179#undef ASSERT_REG_POSITION 350#undef ASSERT_REG_POSITION
180 351
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 898370739..ba387506e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
58 } 58 }
59} 59}
60 60
61Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
62 const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
63 ASSERT(cbuf_mask[regs.tex_cb_index]);
64
65 const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
66 ASSERT(texinfo.Address() != 0);
67
68 const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
69 ASSERT(address < texinfo.Address() + texinfo.size);
70
71 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
72 return GetTextureInfo(tex_handle);
73}
74
75Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
76 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
77}
78
79u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { 61u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
80 ASSERT(stage == ShaderType::Compute); 62 ASSERT(stage == ShaderType::Compute);
81 const auto& buffer = launch_description.const_buffer_config[const_buffer]; 63 const auto& buffer = launch_description.const_buffer_config[const_buffer];
@@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
98 80
99SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { 81SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
100 const Texture::TextureHandle tex_handle{handle}; 82 const Texture::TextureHandle tex_handle{handle};
101 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 83 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
102 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 84 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
103 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 85
86 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
87 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
104 return result; 88 return result;
105} 89}
106 90
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7f2500aab..51a041202 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -209,11 +209,6 @@ public:
209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
210 u32 methods_pending) override; 210 u32 methods_pending) override;
211 211
212 Texture::FullTextureInfo GetTexture(std::size_t offset) const;
213
214 /// Given a texture handle, returns the TSC and TIC entries.
215 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
216
217 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 212 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
218 213
219 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; 214 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index dc71b2eec..9911140e9 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,8 +14,8 @@
14 14
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) 17KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
18 : system{system}, upload_state{memory_manager, regs.upload} {} 18 : system{system_}, upload_state{memory_manager, regs.upload} {}
19 19
20KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
21 21
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5b7f71a00..62483589e 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -35,7 +35,7 @@ namespace Tegra::Engines {
35 35
36class KeplerMemory final : public EngineInterface { 36class KeplerMemory final : public EngineInterface {
37public: 37public:
38 KeplerMemory(Core::System& system, MemoryManager& memory_manager); 38 explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
39 ~KeplerMemory(); 39 ~KeplerMemory();
40 40
41 /// Write the value to the register identified by method. 41 /// Write the value to the register identified by method.
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 6287df633..9be651e24 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cinttypes>
6#include <cstring> 5#include <cstring>
7#include <optional> 6#include <optional>
8#include "common/assert.h" 7#include "common/assert.h"
@@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
227 OnMemoryWrite(); 226 OnMemoryWrite();
228 } 227 }
229 return; 228 return;
229 case MAXWELL3D_REG_INDEX(fragment_barrier):
230 return rasterizer->FragmentBarrier();
231 case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
232 return rasterizer->TiledCacheBarrier();
230 } 233 }
231} 234}
232 235
@@ -359,7 +362,7 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
359} 362}
360 363
361void Maxwell3D::FlushMMEInlineDraw() { 364void Maxwell3D::FlushMMEInlineDraw() {
362 LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), 365 LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
363 regs.vertex_buffer.count); 366 regs.vertex_buffer.count);
364 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); 367 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
365 ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); 368 ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
@@ -504,8 +507,7 @@ void Maxwell3D::ProcessCounterReset() {
504 rasterizer->ResetCounter(QueryType::SamplesPassed); 507 rasterizer->ResetCounter(QueryType::SamplesPassed);
505 break; 508 break;
506 default: 509 default:
507 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", 510 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset);
508 static_cast<int>(regs.counter_reset));
509 break; 511 break;
510 } 512 }
511} 513}
@@ -520,7 +522,7 @@ void Maxwell3D::ProcessSyncPoint() {
520} 522}
521 523
522void Maxwell3D::DrawArrays() { 524void Maxwell3D::DrawArrays() {
523 LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), 525 LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
524 regs.vertex_buffer.count); 526 regs.vertex_buffer.count);
525 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); 527 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
526 528
@@ -558,12 +560,12 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
558 return 0; 560 return 0;
559 case Regs::QuerySelect::SamplesPassed: 561 case Regs::QuerySelect::SamplesPassed:
560 // Deferred. 562 // Deferred.
561 rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, 563 rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
562 system.GPU().GetTicks()); 564 system.GPU().GetTicks());
563 return std::nullopt; 565 return std::nullopt;
564 default: 566 default:
565 LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", 567 LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
566 static_cast<u32>(regs.query.query_get.select.Value())); 568 regs.query.query_get.select.Value());
567 return 1; 569 return 1;
568 } 570 }
569} 571}
@@ -640,7 +642,7 @@ void Maxwell3D::FinishCBData() {
640} 642}
641 643
642Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 644Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
643 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; 645 const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
644 646
645 Texture::TICEntry tic_entry; 647 Texture::TICEntry tic_entry;
646 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 648 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -649,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
649} 651}
650 652
651Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 653Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
652 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; 654 const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
653 655
654 Texture::TSCEntry tsc_entry; 656 Texture::TSCEntry tsc_entry;
655 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); 657 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
656 return tsc_entry; 658 return tsc_entry;
657} 659}
658 660
659Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
660 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
661}
662
663Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
664 const auto stage_index = static_cast<std::size_t>(stage);
665 const auto& shader = state.shader_stages[stage_index];
666 const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
667 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
668
669 const GPUVAddr tex_info_address =
670 tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
671
672 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
673
674 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
675
676 return GetTextureInfo(tex_handle);
677}
678
679u32 Maxwell3D::GetRegisterValue(u32 method) const { 661u32 Maxwell3D::GetRegisterValue(u32 method) const {
680 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); 662 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
681 return regs.reg_array[method]; 663 return regs.reg_array[method];
682} 664}
683 665
684void Maxwell3D::ProcessClearBuffers() { 666void Maxwell3D::ProcessClearBuffers() {
685 ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
686 regs.clear_buffers.R == regs.clear_buffers.B &&
687 regs.clear_buffers.R == regs.clear_buffers.A);
688
689 rasterizer->Clear(); 667 rasterizer->Clear();
690} 668}
691 669
@@ -693,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
693 ASSERT(stage != ShaderType::Compute); 671 ASSERT(stage != ShaderType::Compute);
694 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; 672 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
695 const auto& buffer = shader_stage.const_buffers[const_buffer]; 673 const auto& buffer = shader_stage.const_buffers[const_buffer];
696 u32 result; 674 return memory_manager.Read<u32>(buffer.address + offset);
697 std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
698 return result;
699} 675}
700 676
701SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { 677SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
@@ -713,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
713 689
714SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { 690SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
715 const Texture::TextureHandle tex_handle{handle}; 691 const Texture::TextureHandle tex_handle{handle};
716 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 692 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
717 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 693 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
718 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 694
695 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
696 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
719 return result; 697 return result;
720} 698}
721 699
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index b0d9559d0..bf9e07c9b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -438,16 +438,6 @@ public:
438 DecrWrapOGL = 0x8508, 438 DecrWrapOGL = 0x8508,
439 }; 439 };
440 440
441 enum class MemoryLayout : u32 {
442 Linear = 0,
443 BlockLinear = 1,
444 };
445
446 enum class InvMemoryLayout : u32 {
447 BlockLinear = 0,
448 Linear = 1,
449 };
450
451 enum class CounterReset : u32 { 441 enum class CounterReset : u32 {
452 SampleCnt = 0x01, 442 SampleCnt = 0x01,
453 Unk02 = 0x02, 443 Unk02 = 0x02,
@@ -589,21 +579,31 @@ public:
589 NegativeW = 7, 579 NegativeW = 7,
590 }; 580 };
591 581
582 enum class SamplerIndex : u32 {
583 Independently = 0,
584 ViaHeaderIndex = 1,
585 };
586
587 struct TileMode {
588 union {
589 BitField<0, 4, u32> block_width;
590 BitField<4, 4, u32> block_height;
591 BitField<8, 4, u32> block_depth;
592 BitField<12, 1, u32> is_pitch_linear;
593 BitField<16, 1, u32> is_3d;
594 };
595 };
596 static_assert(sizeof(TileMode) == 4);
597
592 struct RenderTargetConfig { 598 struct RenderTargetConfig {
593 u32 address_high; 599 u32 address_high;
594 u32 address_low; 600 u32 address_low;
595 u32 width; 601 u32 width;
596 u32 height; 602 u32 height;
597 Tegra::RenderTargetFormat format; 603 Tegra::RenderTargetFormat format;
604 TileMode tile_mode;
598 union { 605 union {
599 BitField<0, 3, u32> block_width; 606 BitField<0, 16, u32> depth;
600 BitField<4, 3, u32> block_height;
601 BitField<8, 3, u32> block_depth;
602 BitField<12, 1, InvMemoryLayout> type;
603 BitField<16, 1, u32> is_3d;
604 } memory_layout;
605 union {
606 BitField<0, 16, u32> layers;
607 BitField<16, 1, u32> volume; 607 BitField<16, 1, u32> volume;
608 }; 608 };
609 u32 layer_stride; 609 u32 layer_stride;
@@ -832,7 +832,11 @@ public:
832 832
833 u32 patch_vertices; 833 u32 patch_vertices;
834 834
835 INSERT_UNION_PADDING_WORDS(0xC); 835 INSERT_UNION_PADDING_WORDS(0x4);
836
837 u32 fragment_barrier;
838
839 INSERT_UNION_PADDING_WORDS(0x7);
836 840
837 std::array<ScissorTest, NumViewports> scissor_test; 841 std::array<ScissorTest, NumViewports> scissor_test;
838 842
@@ -842,7 +846,15 @@ public:
842 u32 stencil_back_mask; 846 u32 stencil_back_mask;
843 u32 stencil_back_func_mask; 847 u32 stencil_back_func_mask;
844 848
845 INSERT_UNION_PADDING_WORDS(0xC); 849 INSERT_UNION_PADDING_WORDS(0x5);
850
851 u32 invalidate_texture_data_cache;
852
853 INSERT_UNION_PADDING_WORDS(0x1);
854
855 u32 tiled_cache_barrier;
856
857 INSERT_UNION_PADDING_WORDS(0x4);
846 858
847 u32 color_mask_common; 859 u32 color_mask_common;
848 860
@@ -866,12 +878,7 @@ public:
866 u32 address_high; 878 u32 address_high;
867 u32 address_low; 879 u32 address_low;
868 Tegra::DepthFormat format; 880 Tegra::DepthFormat format;
869 union { 881 TileMode tile_mode;
870 BitField<0, 4, u32> block_width;
871 BitField<4, 4, u32> block_height;
872 BitField<8, 4, u32> block_depth;
873 BitField<20, 1, InvMemoryLayout> type;
874 } memory_layout;
875 u32 layer_stride; 882 u32 layer_stride;
876 883
877 GPUVAddr Address() const { 884 GPUVAddr Address() const {
@@ -880,7 +887,18 @@ public:
880 } 887 }
881 } zeta; 888 } zeta;
882 889
883 INSERT_UNION_PADDING_WORDS(0x41); 890 struct {
891 union {
892 BitField<0, 16, u32> x;
893 BitField<16, 16, u32> width;
894 };
895 union {
896 BitField<0, 16, u32> y;
897 BitField<16, 16, u32> height;
898 };
899 } render_area;
900
901 INSERT_UNION_PADDING_WORDS(0x3F);
884 902
885 union { 903 union {
886 BitField<0, 4, u32> stencil; 904 BitField<0, 4, u32> stencil;
@@ -921,7 +939,7 @@ public:
921 BitField<25, 3, u32> map_7; 939 BitField<25, 3, u32> map_7;
922 }; 940 };
923 941
924 u32 GetMap(std::size_t index) const { 942 u32 Map(std::size_t index) const {
925 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, 943 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
926 map_4, map_5, map_6, map_7}; 944 map_4, map_5, map_6, map_7};
927 ASSERT(index < maps.size()); 945 ASSERT(index < maps.size());
@@ -934,11 +952,13 @@ public:
934 u32 zeta_width; 952 u32 zeta_width;
935 u32 zeta_height; 953 u32 zeta_height;
936 union { 954 union {
937 BitField<0, 16, u32> zeta_layers; 955 BitField<0, 16, u32> zeta_depth;
938 BitField<16, 1, u32> zeta_volume; 956 BitField<16, 1, u32> zeta_volume;
939 }; 957 };
940 958
941 INSERT_UNION_PADDING_WORDS(0x26); 959 SamplerIndex sampler_index;
960
961 INSERT_UNION_PADDING_WORDS(0x25);
942 962
943 u32 depth_test_enable; 963 u32 depth_test_enable;
944 964
@@ -964,6 +984,7 @@ public:
964 float b; 984 float b;
965 float a; 985 float a;
966 } blend_color; 986 } blend_color;
987
967 INSERT_UNION_PADDING_WORDS(0x4); 988 INSERT_UNION_PADDING_WORDS(0x4);
968 989
969 struct { 990 struct {
@@ -1001,7 +1022,12 @@ public:
1001 float line_width_smooth; 1022 float line_width_smooth;
1002 float line_width_aliased; 1023 float line_width_aliased;
1003 1024
1004 INSERT_UNION_PADDING_WORDS(0x1F); 1025 INSERT_UNION_PADDING_WORDS(0x1B);
1026
1027 u32 invalidate_sampler_cache_no_wfi;
1028 u32 invalidate_texture_header_cache_no_wfi;
1029
1030 INSERT_UNION_PADDING_WORDS(0x2);
1005 1031
1006 u32 vb_element_base; 1032 u32 vb_element_base;
1007 u32 vb_base_instance; 1033 u32 vb_base_instance;
@@ -1045,13 +1071,13 @@ public:
1045 } condition; 1071 } condition;
1046 1072
1047 struct { 1073 struct {
1048 u32 tsc_address_high; 1074 u32 address_high;
1049 u32 tsc_address_low; 1075 u32 address_low;
1050 u32 tsc_limit; 1076 u32 limit;
1051 1077
1052 GPUVAddr TSCAddress() const { 1078 GPUVAddr Address() const {
1053 return static_cast<GPUVAddr>( 1079 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
1054 (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); 1080 address_low);
1055 } 1081 }
1056 } tsc; 1082 } tsc;
1057 1083
@@ -1062,13 +1088,13 @@ public:
1062 u32 line_smooth_enable; 1088 u32 line_smooth_enable;
1063 1089
1064 struct { 1090 struct {
1065 u32 tic_address_high; 1091 u32 address_high;
1066 u32 tic_address_low; 1092 u32 address_low;
1067 u32 tic_limit; 1093 u32 limit;
1068 1094
1069 GPUVAddr TICAddress() const { 1095 GPUVAddr Address() const {
1070 return static_cast<GPUVAddr>( 1096 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
1071 (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); 1097 address_low);
1072 } 1098 }
1073 } tic; 1099 } tic;
1074 1100
@@ -1397,12 +1423,6 @@ public:
1397 1423
1398 void FlushMMEInlineDraw(); 1424 void FlushMMEInlineDraw();
1399 1425
1400 /// Given a texture handle, returns the TSC and TIC entries.
1401 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
1402
1403 /// Returns the texture information for a specific texture in a specific shader stage.
1404 Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
1405
1406 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 1426 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1407 1427
1408 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; 1428 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
@@ -1473,39 +1493,6 @@ private:
1473 1493
1474 void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); 1494 void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
1475 1495
1476 Core::System& system;
1477 MemoryManager& memory_manager;
1478
1479 VideoCore::RasterizerInterface* rasterizer = nullptr;
1480
1481 /// Start offsets of each macro in macro_memory
1482 std::array<u32, 0x80> macro_positions = {};
1483
1484 std::array<bool, Regs::NUM_REGS> mme_inline{};
1485
1486 /// Macro method that is currently being executed / being fed parameters.
1487 u32 executing_macro = 0;
1488 /// Parameters that have been submitted to the macro call so far.
1489 std::vector<u32> macro_params;
1490
1491 /// Interpreter for the macro codes uploaded to the GPU.
1492 std::unique_ptr<MacroEngine> macro_engine;
1493
1494 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1495 struct {
1496 std::array<std::array<u32, 0x4000>, 16> buffer;
1497 u32 current{null_cb_data};
1498 u32 id{null_cb_data};
1499 u32 start_pos{};
1500 u32 counter{};
1501 } cb_data_state;
1502
1503 Upload::State upload_state;
1504
1505 bool execute_on{true};
1506
1507 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1508
1509 /// Retrieves information about a specific TIC entry from the TIC buffer. 1496 /// Retrieves information about a specific TIC entry from the TIC buffer.
1510 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1497 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1511 1498
@@ -1514,8 +1501,8 @@ private:
1514 1501
1515 /** 1502 /**
1516 * Call a macro on this engine. 1503 * Call a macro on this engine.
1504 *
1517 * @param method Method to call 1505 * @param method Method to call
1518 * @param num_parameters Number of arguments
1519 * @param parameters Arguments to the method call 1506 * @param parameters Arguments to the method call
1520 */ 1507 */
1521 void CallMacroMethod(u32 method, const std::vector<u32>& parameters); 1508 void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
@@ -1564,6 +1551,38 @@ private:
1564 1551
1565 /// Returns a query's value or an empty object if the value will be deferred through a cache. 1552 /// Returns a query's value or an empty object if the value will be deferred through a cache.
1566 std::optional<u64> GetQueryResult(); 1553 std::optional<u64> GetQueryResult();
1554
1555 Core::System& system;
1556 MemoryManager& memory_manager;
1557
1558 VideoCore::RasterizerInterface* rasterizer = nullptr;
1559
1560 /// Start offsets of each macro in macro_memory
1561 std::array<u32, 0x80> macro_positions{};
1562
1563 std::array<bool, Regs::NUM_REGS> mme_inline{};
1564
1565 /// Macro method that is currently being executed / being fed parameters.
1566 u32 executing_macro = 0;
1567 /// Parameters that have been submitted to the macro call so far.
1568 std::vector<u32> macro_params;
1569
1570 /// Interpreter for the macro codes uploaded to the GPU.
1571 std::unique_ptr<MacroEngine> macro_engine;
1572
1573 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1574 struct CBDataState {
1575 std::array<std::array<u32, 0x4000>, 16> buffer;
1576 u32 current{null_cb_data};
1577 u32 id{null_cb_data};
1578 u32 start_pos{};
1579 u32 counter{};
1580 };
1581 CBDataState cb_data_state;
1582
1583 Upload::State upload_state;
1584
1585 bool execute_on{true};
1567}; 1586};
1568 1587
1569#define ASSERT_REG_POSITION(field_name, position) \ 1588#define ASSERT_REG_POSITION(field_name, position) \
@@ -1599,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1599ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); 1618ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1600ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); 1619ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
1601ASSERT_REG_POSITION(patch_vertices, 0x373); 1620ASSERT_REG_POSITION(patch_vertices, 0x373);
1621ASSERT_REG_POSITION(fragment_barrier, 0x378);
1602ASSERT_REG_POSITION(scissor_test, 0x380); 1622ASSERT_REG_POSITION(scissor_test, 0x380);
1603ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); 1623ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1604ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1624ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
1605ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); 1625ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1626ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
1627ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
1606ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1628ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1607ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1629ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1608ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); 1630ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
@@ -1610,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
1610ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); 1632ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
1611ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); 1633ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
1612ASSERT_REG_POSITION(zeta, 0x3F8); 1634ASSERT_REG_POSITION(zeta, 0x3F8);
1635ASSERT_REG_POSITION(render_area, 0x3FD);
1613ASSERT_REG_POSITION(clear_flags, 0x43E); 1636ASSERT_REG_POSITION(clear_flags, 0x43E);
1614ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1637ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1615ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1638ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1618,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
1618ASSERT_REG_POSITION(rt_control, 0x487); 1641ASSERT_REG_POSITION(rt_control, 0x487);
1619ASSERT_REG_POSITION(zeta_width, 0x48a); 1642ASSERT_REG_POSITION(zeta_width, 0x48a);
1620ASSERT_REG_POSITION(zeta_height, 0x48b); 1643ASSERT_REG_POSITION(zeta_height, 0x48b);
1621ASSERT_REG_POSITION(zeta_layers, 0x48c); 1644ASSERT_REG_POSITION(zeta_depth, 0x48c);
1645ASSERT_REG_POSITION(sampler_index, 0x48D);
1622ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 1646ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
1623ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 1647ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
1624ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 1648ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1642,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
1642ASSERT_REG_POSITION(screen_y_control, 0x4EB); 1666ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1643ASSERT_REG_POSITION(line_width_smooth, 0x4EC); 1667ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
1644ASSERT_REG_POSITION(line_width_aliased, 0x4ED); 1668ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
1669ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
1670ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
1645ASSERT_REG_POSITION(vb_element_base, 0x50D); 1671ASSERT_REG_POSITION(vb_element_base, 0x50D);
1646ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1672ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1647ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1673ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 8fa359d0a..ba750748c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -16,8 +16,10 @@ namespace Tegra::Engines {
16 16
17using namespace Texture; 17using namespace Texture;
18 18
19MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) 19MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
20 : system{system}, memory_manager{memory_manager} {} 20 : system{system_}, memory_manager{memory_manager_} {}
21
22MaxwellDMA::~MaxwellDMA() = default;
21 23
22void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 24void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
23 ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); 25 ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
@@ -94,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
94} 96}
95 97
96void MaxwellDMA::CopyBlockLinearToPitch() { 98void MaxwellDMA::CopyBlockLinearToPitch() {
99 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
97 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); 100 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
98 UNIMPLEMENTED_IF(regs.src_params.layer != 0); 101 UNIMPLEMENTED_IF(regs.src_params.layer != 0);
99 102
@@ -133,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
133} 136}
134 137
135void MaxwellDMA::CopyPitchToBlockLinear() { 138void MaxwellDMA::CopyPitchToBlockLinear() {
139 UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
140
136 const auto& dst_params = regs.dst_params; 141 const auto& dst_params = regs.dst_params;
137 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; 142 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
138 const u32 width = dst_params.width; 143 const u32 width = dst_params.width;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 50f445efc..3c59eeb13 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -72,11 +72,13 @@ public:
72 72
73 struct RenderEnable { 73 struct RenderEnable {
74 enum class Mode : u32 { 74 enum class Mode : u32 {
75 FALSE = 0, 75 // Note: This uses Pascal case in order to avoid the identifiers
76 TRUE = 1, 76 // FALSE and TRUE, which are reserved on Darwin.
77 CONDITIONAL = 2, 77 False = 0,
78 RENDER_IF_EQUAL = 3, 78 True = 1,
79 RENDER_IF_NOT_EQUAL = 4, 79 Conditional = 2,
80 RenderIfEqual = 3,
81 RenderIfNotEqual = 4,
80 }; 82 };
81 83
82 PackedGPUVAddr address; 84 PackedGPUVAddr address;
@@ -185,8 +187,8 @@ public:
185 }; 187 };
186 static_assert(sizeof(RemapConst) == 12); 188 static_assert(sizeof(RemapConst) == 12);
187 189
188 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); 190 explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
189 ~MaxwellDMA() = default; 191 ~MaxwellDMA();
190 192
191 /// Write the value to the register identified by method. 193 /// Write the value to the register identified by method.
192 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; 194 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 37d17efdc..8b45f1b62 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1437,8 +1437,7 @@ union Instruction {
1437 return TextureType::TextureCube; 1437 return TextureType::TextureCube;
1438 } 1438 }
1439 1439
1440 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", 1440 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1441 static_cast<u32>(texture_info.Value()));
1442 UNREACHABLE(); 1441 UNREACHABLE();
1443 return TextureType::Texture1D; 1442 return TextureType::Texture1D;
1444 } 1443 }
@@ -1533,8 +1532,7 @@ union Instruction {
1533 return TextureType::Texture3D; 1532 return TextureType::Texture3D;
1534 } 1533 }
1535 1534
1536 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", 1535 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1537 static_cast<u32>(texture_info.Value()));
1538 UNREACHABLE(); 1536 UNREACHABLE();
1539 return TextureType::Texture1D; 1537 return TextureType::Texture1D;
1540 } 1538 }
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index de6991ef6..3512283ff 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -9,6 +9,7 @@
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/core.h" 11#include "core/core.h"
12#include "video_core/delayed_destruction_ring.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
@@ -17,11 +18,11 @@ namespace VideoCommon {
17 18
18class FenceBase { 19class FenceBase {
19public: 20public:
20 FenceBase(u32 payload, bool is_stubbed) 21 explicit FenceBase(u32 payload_, bool is_stubbed_)
21 : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {} 22 : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
22 23
23 FenceBase(GPUVAddr address, u32 payload, bool is_stubbed) 24 explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
24 : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {} 25 : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
25 26
26 GPUVAddr GetAddress() const { 27 GPUVAddr GetAddress() const {
27 return address; 28 return address;
@@ -47,6 +48,11 @@ protected:
47template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> 48template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
48class FenceManager { 49class FenceManager {
49public: 50public:
51 /// Notify the fence manager about a new frame
52 void TickFrame() {
53 delayed_destruction_ring.Tick();
54 }
55
50 void SignalSemaphore(GPUVAddr addr, u32 value) { 56 void SignalSemaphore(GPUVAddr addr, u32 value) {
51 TryReleasePendingFences(); 57 TryReleasePendingFences();
52 const bool should_flush = ShouldFlush(); 58 const bool should_flush = ShouldFlush();
@@ -86,7 +92,7 @@ public:
86 } else { 92 } else {
87 gpu.IncrementSyncPoint(current_fence->GetPayload()); 93 gpu.IncrementSyncPoint(current_fence->GetPayload());
88 } 94 }
89 fences.pop(); 95 PopFence();
90 } 96 }
91 } 97 }
92 98
@@ -132,7 +138,7 @@ private:
132 } else { 138 } else {
133 gpu.IncrementSyncPoint(current_fence->GetPayload()); 139 gpu.IncrementSyncPoint(current_fence->GetPayload());
134 } 140 }
135 fences.pop(); 141 PopFence();
136 } 142 }
137 } 143 }
138 144
@@ -158,7 +164,14 @@ private:
158 query_cache.CommitAsyncFlushes(); 164 query_cache.CommitAsyncFlushes();
159 } 165 }
160 166
167 void PopFence() {
168 delayed_destruction_ring.Push(std::move(fences.front()));
169 fences.pop();
170 }
171
161 std::queue<TFence> fences; 172 std::queue<TFence> fences;
173
174 DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
162}; 175};
163 176
164} // namespace VideoCommon 177} // namespace VideoCommon
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
new file mode 100644
index 000000000..b86c3a757
--- /dev/null
+++ b/src/video_core/framebuffer_config.h
@@ -0,0 +1,31 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Tegra {
8
9/**
10 * Struct describing framebuffer configuration
11 */
12struct FramebufferConfig {
13 enum class PixelFormat : u32 {
14 A8B8G8R8_UNORM = 1,
15 RGB565_UNORM = 4,
16 B8G8R8A8_UNORM = 5,
17 };
18
19 VAddr address{};
20 u32 offset{};
21 u32 width{};
22 u32 height{};
23 u32 stride{};
24 PixelFormat pixel_format{};
25
26 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
27 TransformFlags transform_flags{};
28 Common::Rectangle<int> crop_rect;
29};
30
31} // namespace Tegra
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e91f52938..6ab06775f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core_timing.h" 10#include "core/core_timing.h"
11#include "core/core_timing_util.h" 11#include "core/core_timing_util.h"
12#include "core/frontend/emu_window.h" 12#include "core/frontend/emu_window.h"
13#include "core/hardware_interrupt_manager.h"
13#include "core/memory.h" 14#include "core/memory.h"
14#include "core/settings.h" 15#include "core/settings.h"
15#include "video_core/engines/fermi_2d.h" 16#include "video_core/engines/fermi_2d.h"
@@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
36 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
37 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, 38 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
38 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, 39 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
39 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} 40 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
41 gpu_thread{system_, is_async_} {}
40 42
41GPU::~GPU() = default; 43GPU::~GPU() = default;
42 44
@@ -198,10 +200,6 @@ void GPU::SyncGuestHost() {
198 renderer->Rasterizer().SyncGuestHost(); 200 renderer->Rasterizer().SyncGuestHost();
199} 201}
200 202
201void GPU::OnCommandListEnd() {
202 renderer->Rasterizer().ReleaseFences();
203}
204
205enum class GpuSemaphoreOperation { 203enum class GpuSemaphoreOperation {
206 AcquireEqual = 0x1, 204 AcquireEqual = 0x1,
207 WriteLong = 0x2, 205 WriteLong = 0x2,
@@ -232,8 +230,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32
232 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); 230 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
233 } else { 231 } else {
234 for (std::size_t i = 0; i < amount; i++) { 232 for (std::size_t i = 0; i < amount; i++) {
235 CallPullerMethod( 233 CallPullerMethod(MethodCall{
236 {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)}); 234 method,
235 base_start[i],
236 subchannel,
237 methods_pending - static_cast<u32>(i),
238 });
237 } 239 }
238 } 240 }
239} 241}
@@ -295,8 +297,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
295 break; 297 break;
296 } 298 }
297 default: 299 default:
298 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", 300 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
299 static_cast<u32>(method));
300 break; 301 break;
301 } 302 }
302} 303}
@@ -375,7 +376,7 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
375 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); 376 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
376 break; 377 break;
377 default: 378 default:
378 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id)); 379 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
379 } 380 }
380} 381}
381 382
@@ -388,8 +389,7 @@ void GPU::ProcessFenceActionMethod() {
388 IncrementSyncPoint(regs.fence_action.syncpoint_id); 389 IncrementSyncPoint(regs.fence_action.syncpoint_id);
389 break; 390 break;
390 default: 391 default:
391 UNIMPLEMENTED_MSG("Unimplemented operation {}", 392 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
392 static_cast<u32>(regs.fence_action.op.Value()));
393 } 393 }
394} 394}
395 395
@@ -459,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() {
459 } 459 }
460} 460}
461 461
462void GPU::Start() {
463 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
464 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
465 cpu_context->MakeCurrent();
466}
467
468void GPU::ObtainContext() {
469 cpu_context->MakeCurrent();
470}
471
472void GPU::ReleaseContext() {
473 cpu_context->DoneCurrent();
474}
475
476void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
477 gpu_thread.SubmitList(std::move(entries));
478}
479
480void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
481 if (!use_nvdec) {
482 return;
483 }
484 // This condition fires when a video stream ends, clear all intermediary data
485 if (entries[0].raw == 0xDEADB33F) {
486 cdma_pusher.reset();
487 return;
488 }
489 if (!cdma_pusher) {
490 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
491 }
492
493 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
494 // TODO(ameerj): RE proper async nvdec operation
495 // gpu_thread.SubmitCommandBuffer(std::move(entries));
496
497 cdma_pusher->Push(std::move(entries));
498 cdma_pusher->DispatchCalls();
499}
500
501void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
502 gpu_thread.SwapBuffers(framebuffer);
503}
504
505void GPU::FlushRegion(VAddr addr, u64 size) {
506 gpu_thread.FlushRegion(addr, size);
507}
508
509void GPU::InvalidateRegion(VAddr addr, u64 size) {
510 gpu_thread.InvalidateRegion(addr, size);
511}
512
513void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
514 gpu_thread.FlushAndInvalidateRegion(addr, size);
515}
516
517void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
518 auto& interrupt_manager = system.InterruptManager();
519 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
520}
521
522void GPU::WaitIdle() const {
523 gpu_thread.WaitIdle();
524}
525
526void GPU::OnCommandListEnd() {
527 if (is_async) {
528 // This command only applies to asynchronous GPU mode
529 gpu_thread.OnCommandListEnd();
530 }
531}
532
462} // namespace Tegra 533} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 21410e125..d81e38680 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,6 +15,8 @@
15#include "core/hle/service/nvflinger/buffer_queue.h" 15#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "video_core/cdma_pusher.h" 16#include "video_core/cdma_pusher.h"
17#include "video_core/dma_pusher.h" 17#include "video_core/dma_pusher.h"
18#include "video_core/framebuffer_config.h"
19#include "video_core/gpu_thread.h"
18 20
19using CacheAddr = std::uintptr_t; 21using CacheAddr = std::uintptr_t;
20[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { 22[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@@ -101,28 +103,6 @@ enum class DepthFormat : u32 {
101struct CommandListHeader; 103struct CommandListHeader;
102class DebugContext; 104class DebugContext;
103 105
104/**
105 * Struct describing framebuffer configuration
106 */
107struct FramebufferConfig {
108 enum class PixelFormat : u32 {
109 A8B8G8R8_UNORM = 1,
110 RGB565_UNORM = 4,
111 B8G8R8A8_UNORM = 5,
112 };
113
114 VAddr address;
115 u32 offset;
116 u32 width;
117 u32 height;
118 u32 stride;
119 PixelFormat pixel_format;
120
121 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
122 TransformFlags transform_flags;
123 Common::Rectangle<int> crop_rect;
124};
125
126namespace Engines { 106namespace Engines {
127class Fermi2D; 107class Fermi2D;
128class Maxwell3D; 108class Maxwell3D;
@@ -141,7 +121,7 @@ enum class EngineID {
141 121
142class MemoryManager; 122class MemoryManager;
143 123
144class GPU { 124class GPU final {
145public: 125public:
146 struct MethodCall { 126 struct MethodCall {
147 u32 method{}; 127 u32 method{};
@@ -149,17 +129,17 @@ public:
149 u32 subchannel{}; 129 u32 subchannel{};
150 u32 method_count{}; 130 u32 method_count{};
151 131
152 MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0) 132 explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
153 : method(method), argument(argument), subchannel(subchannel), 133 : method(method_), argument(argument_), subchannel(subchannel_),
154 method_count(method_count) {} 134 method_count(method_count_) {}
155 135
156 [[nodiscard]] bool IsLastCall() const { 136 [[nodiscard]] bool IsLastCall() const {
157 return method_count <= 1; 137 return method_count <= 1;
158 } 138 }
159 }; 139 };
160 140
161 explicit GPU(Core::System& system, bool is_async, bool use_nvdec); 141 explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
162 virtual ~GPU(); 142 ~GPU();
163 143
164 /// Binds a renderer to the GPU. 144 /// Binds a renderer to the GPU.
165 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); 145 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
@@ -176,7 +156,7 @@ public:
176 /// Synchronizes CPU writes with Host GPU memory. 156 /// Synchronizes CPU writes with Host GPU memory.
177 void SyncGuestHost(); 157 void SyncGuestHost();
178 /// Signal the ending of command list. 158 /// Signal the ending of command list.
179 virtual void OnCommandListEnd(); 159 void OnCommandListEnd();
180 160
181 /// Request a host GPU memory flush from the CPU. 161 /// Request a host GPU memory flush from the CPU.
182 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 162 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@@ -240,7 +220,7 @@ public:
240 } 220 }
241 221
242 // Waits for the GPU to finish working 222 // Waits for the GPU to finish working
243 virtual void WaitIdle() const = 0; 223 void WaitIdle() const;
244 224
245 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 225 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
246 void WaitFence(u32 syncpoint_id, u32 value); 226 void WaitFence(u32 syncpoint_id, u32 value);
@@ -330,34 +310,34 @@ public:
330 /// Performs any additional setup necessary in order to begin GPU emulation. 310 /// Performs any additional setup necessary in order to begin GPU emulation.
331 /// This can be used to launch any necessary threads and register any necessary 311 /// This can be used to launch any necessary threads and register any necessary
332 /// core timing events. 312 /// core timing events.
333 virtual void Start() = 0; 313 void Start();
334 314
335 /// Obtain the CPU Context 315 /// Obtain the CPU Context
336 virtual void ObtainContext() = 0; 316 void ObtainContext();
337 317
338 /// Release the CPU Context 318 /// Release the CPU Context
339 virtual void ReleaseContext() = 0; 319 void ReleaseContext();
340 320
341 /// Push GPU command entries to be processed 321 /// Push GPU command entries to be processed
342 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; 322 void PushGPUEntries(Tegra::CommandList&& entries);
343 323
344 /// Push GPU command buffer entries to be processed 324 /// Push GPU command buffer entries to be processed
345 virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; 325 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
346 326
347 /// Swap buffers (render frame) 327 /// Swap buffers (render frame)
348 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 328 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
349 329
350 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 330 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
351 virtual void FlushRegion(VAddr addr, u64 size) = 0; 331 void FlushRegion(VAddr addr, u64 size);
352 332
353 /// Notify rasterizer that any caches of the specified region should be invalidated 333 /// Notify rasterizer that any caches of the specified region should be invalidated
354 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 334 void InvalidateRegion(VAddr addr, u64 size);
355 335
356 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 336 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
357 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 337 void FlushAndInvalidateRegion(VAddr addr, u64 size);
358 338
359protected: 339protected:
360 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; 340 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
361 341
362private: 342private:
363 void ProcessBindMethod(const MethodCall& method_call); 343 void ProcessBindMethod(const MethodCall& method_call);
@@ -414,8 +394,8 @@ private:
414 std::condition_variable sync_cv; 394 std::condition_variable sync_cv;
415 395
416 struct FlushRequest { 396 struct FlushRequest {
417 FlushRequest(u64 fence, VAddr addr, std::size_t size) 397 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
418 : fence{fence}, addr{addr}, size{size} {} 398 : fence{fence_}, addr{addr_}, size{size_} {}
419 u64 fence; 399 u64 fence;
420 VAddr addr; 400 VAddr addr;
421 std::size_t size; 401 std::size_t size;
@@ -427,6 +407,9 @@ private:
427 std::mutex flush_request_mutex; 407 std::mutex flush_request_mutex;
428 408
429 const bool is_async; 409 const bool is_async;
410
411 VideoCommon::GPUThread::ThreadManager gpu_thread;
412 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
430}; 413};
431 414
432#define ASSERT_REG_POSITION(field_name, position) \ 415#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
deleted file mode 100644
index a9baaf7ef..000000000
--- a/src/video_core/gpu_asynch.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/hardware_interrupt_manager.h"
7#include "video_core/gpu_asynch.h"
8#include "video_core/gpu_thread.h"
9#include "video_core/renderer_base.h"
10
11namespace VideoCommon {
12
13GPUAsynch::GPUAsynch(Core::System& system, bool use_nvdec)
14 : GPU{system, true, use_nvdec}, gpu_thread{system} {}
15
16GPUAsynch::~GPUAsynch() = default;
17
18void GPUAsynch::Start() {
19 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
20 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
21 cpu_context->MakeCurrent();
22}
23
24void GPUAsynch::ObtainContext() {
25 cpu_context->MakeCurrent();
26}
27
28void GPUAsynch::ReleaseContext() {
29 cpu_context->DoneCurrent();
30}
31
32void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
33 gpu_thread.SubmitList(std::move(entries));
34}
35
36void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
37 if (!use_nvdec) {
38 return;
39 }
40 // This condition fires when a video stream ends, clear all intermediary data
41 if (entries[0].raw == 0xDEADB33F) {
42 cdma_pusher.reset();
43 return;
44 }
45 if (!cdma_pusher) {
46 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
47 }
48
49 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
50 // TODO(ameerj): RE proper async nvdec operation
51 // gpu_thread.SubmitCommandBuffer(std::move(entries));
52
53 cdma_pusher->Push(std::move(entries));
54 cdma_pusher->DispatchCalls();
55}
56
57void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
58 gpu_thread.SwapBuffers(framebuffer);
59}
60
61void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
62 gpu_thread.FlushRegion(addr, size);
63}
64
65void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
66 gpu_thread.InvalidateRegion(addr, size);
67}
68
69void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
70 gpu_thread.FlushAndInvalidateRegion(addr, size);
71}
72
73void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
74 auto& interrupt_manager = system.InterruptManager();
75 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
76}
77
78void GPUAsynch::WaitIdle() const {
79 gpu_thread.WaitIdle();
80}
81
82void GPUAsynch::OnCommandListEnd() {
83 gpu_thread.OnCommandListEnd();
84}
85
86} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
deleted file mode 100644
index 0c0872e73..000000000
--- a/src/video_core/gpu_asynch.h
+++ /dev/null
@@ -1,47 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace Core::Frontend {
11class GraphicsContext;
12}
13
14namespace VideoCore {
15class RendererBase;
16} // namespace VideoCore
17
18namespace VideoCommon {
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch final : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system, bool use_nvdec);
24 ~GPUAsynch() override;
25
26 void Start() override;
27 void ObtainContext() override;
28 void ReleaseContext() override;
29 void PushGPUEntries(Tegra::CommandList&& entries) override;
30 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
31 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
32 void FlushRegion(VAddr addr, u64 size) override;
33 void InvalidateRegion(VAddr addr, u64 size) override;
34 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
35 void WaitIdle() const override;
36
37 void OnCommandListEnd() override;
38
39protected:
40 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
41
42private:
43 GPUThread::ThreadManager gpu_thread;
44 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
45};
46
47} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
deleted file mode 100644
index ecf7bbdf3..000000000
--- a/src/video_core/gpu_synch.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system, bool use_nvdec) : GPU{system, false, use_nvdec} {}
11
12GPUSynch::~GPUSynch() = default;
13
14void GPUSynch::Start() {}
15
16void GPUSynch::ObtainContext() {
17 renderer->Context().MakeCurrent();
18}
19
20void GPUSynch::ReleaseContext() {
21 renderer->Context().DoneCurrent();
22}
23
24void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
25 dma_pusher->Push(std::move(entries));
26 dma_pusher->DispatchCalls();
27}
28
29void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
30 if (!use_nvdec) {
31 return;
32 }
33 // This condition fires when a video stream ends, clears all intermediary data
34 if (entries[0].raw == 0xDEADB33F) {
35 cdma_pusher.reset();
36 return;
37 }
38 if (!cdma_pusher) {
39 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
40 }
41 cdma_pusher->Push(std::move(entries));
42 cdma_pusher->DispatchCalls();
43}
44
45void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
46 renderer->SwapBuffers(framebuffer);
47}
48
49void GPUSynch::FlushRegion(VAddr addr, u64 size) {
50 renderer->Rasterizer().FlushRegion(addr, size);
51}
52
53void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
54 renderer->Rasterizer().InvalidateRegion(addr, size);
55}
56
57void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
58 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
59}
60
61} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
deleted file mode 100644
index 9d778c71a..000000000
--- a/src/video_core/gpu_synch.h
+++ /dev/null
@@ -1,41 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace Core::Frontend {
10class GraphicsContext;
11}
12
13namespace VideoCore {
14class RendererBase;
15} // namespace VideoCore
16
17namespace VideoCommon {
18
19/// Implementation of GPU interface that runs the GPU synchronously
20class GPUSynch final : public Tegra::GPU {
21public:
22 explicit GPUSynch(Core::System& system, bool use_nvdec);
23 ~GPUSynch() override;
24
25 void Start() override;
26 void ObtainContext() override;
27 void ReleaseContext() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
30 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
31 void FlushRegion(VAddr addr, u64 size) override;
32 void InvalidateRegion(VAddr addr, u64 size) override;
33 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
34 void WaitIdle() const override {}
35
36protected:
37 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
38 [[maybe_unused]] u32 value) const override {}
39};
40
41} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 4b8f58283..7e490bcc3 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "common/scope_exit.h"
7#include "common/thread.h" 8#include "common/thread.h"
8#include "core/core.h" 9#include "core/core.h"
9#include "core/frontend/emu_window.h" 10#include "core/frontend/emu_window.h"
@@ -21,6 +22,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
21 SynchState& state, Tegra::CDmaPusher& cdma_pusher) { 22 SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
22 std::string name = "yuzu:GPU"; 23 std::string name = "yuzu:GPU";
23 MicroProfileOnThreadCreate(name.c_str()); 24 MicroProfileOnThreadCreate(name.c_str());
25 SCOPE_EXIT({ MicroProfileOnThreadExit(); });
26
24 Common::SetCurrentThreadName(name.c_str()); 27 Common::SetCurrentThreadName(name.c_str());
25 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 28 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
26 system.RegisterHostThread(); 29 system.RegisterHostThread();
@@ -39,23 +42,23 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
39 CommandDataContainer next; 42 CommandDataContainer next;
40 while (state.is_running) { 43 while (state.is_running) {
41 next = state.queue.PopWait(); 44 next = state.queue.PopWait();
42 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { 45 if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
43 dma_pusher.Push(std::move(submit_list->entries)); 46 dma_pusher.Push(std::move(submit_list->entries));
44 dma_pusher.DispatchCalls(); 47 dma_pusher.DispatchCalls();
45 } else if (const auto command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { 48 } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
46 // NVDEC 49 // NVDEC
47 cdma_pusher.Push(std::move(command_list->entries)); 50 cdma_pusher.Push(std::move(command_list->entries));
48 cdma_pusher.DispatchCalls(); 51 cdma_pusher.DispatchCalls();
49 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 52 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
50 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 53 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
51 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { 54 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
52 renderer.Rasterizer().ReleaseFences(); 55 renderer.Rasterizer().ReleaseFences();
53 } else if (std::holds_alternative<GPUTickCommand>(next.data)) { 56 } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
54 system.GPU().TickWork(); 57 system.GPU().TickWork();
55 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 58 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
56 renderer.Rasterizer().FlushRegion(data->addr, data->size); 59 renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
57 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 60 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
58 renderer.Rasterizer().OnCPUWrite(data->addr, data->size); 61 renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
59 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { 62 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
60 return; 63 return;
61 } else { 64 } else {
@@ -65,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
65 } 68 }
66} 69}
67 70
68ThreadManager::ThreadManager(Core::System& system) : system{system} {} 71ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
72 : system{system_}, is_async{is_async_} {}
69 73
70ThreadManager::~ThreadManager() { 74ThreadManager::~ThreadManager() {
71 if (!thread.joinable()) { 75 if (!thread.joinable()) {
@@ -97,19 +101,30 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
97} 101}
98 102
99void ThreadManager::FlushRegion(VAddr addr, u64 size) { 103void ThreadManager::FlushRegion(VAddr addr, u64 size) {
100 if (!Settings::IsGPULevelHigh()) { 104 if (!is_async) {
105 // Always flush with synchronous GPU mode
101 PushCommand(FlushRegionCommand(addr, size)); 106 PushCommand(FlushRegionCommand(addr, size));
102 return; 107 return;
103 } 108 }
104 if (!Settings::IsGPULevelExtreme()) { 109
105 return; 110 // Asynchronous GPU mode
106 } 111 switch (Settings::values.gpu_accuracy.GetValue()) {
107 if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { 112 case Settings::GPUAccuracy::Normal:
113 PushCommand(FlushRegionCommand(addr, size));
114 break;
115 case Settings::GPUAccuracy::High:
116 // TODO(bunnei): Is this right? Preserving existing behavior for now
117 break;
118 case Settings::GPUAccuracy::Extreme: {
108 auto& gpu = system.GPU(); 119 auto& gpu = system.GPU();
109 u64 fence = gpu.RequestFlush(addr, size); 120 u64 fence = gpu.RequestFlush(addr, size);
110 PushCommand(GPUTickCommand()); 121 PushCommand(GPUTickCommand());
111 while (fence > gpu.CurrentFlushRequestFence()) { 122 while (fence > gpu.CurrentFlushRequestFence()) {
112 } 123 }
124 break;
125 }
126 default:
127 UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
113 } 128 }
114} 129}
115 130
@@ -123,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
123} 138}
124 139
125void ThreadManager::WaitIdle() const { 140void ThreadManager::WaitIdle() const {
126 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { 141 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
142 system.IsPoweredOn()) {
127 } 143 }
128} 144}
129 145
@@ -134,6 +150,12 @@ void ThreadManager::OnCommandListEnd() {
134u64 ThreadManager::PushCommand(CommandData&& command_data) { 150u64 ThreadManager::PushCommand(CommandData&& command_data) {
135 const u64 fence{++state.last_fence}; 151 const u64 fence{++state.last_fence};
136 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 152 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
153
154 if (!is_async) {
155 // In synchronous GPU mode, block the caller until the command has executed
156 WaitIdle();
157 }
158
137 return fence; 159 return fence;
138} 160}
139 161
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 32a34e3a7..2775629e7 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,8 +10,9 @@
10#include <optional> 10#include <optional>
11#include <thread> 11#include <thread>
12#include <variant> 12#include <variant>
13
13#include "common/threadsafe_queue.h" 14#include "common/threadsafe_queue.h"
14#include "video_core/gpu.h" 15#include "video_core/framebuffer_config.h"
15 16
16namespace Tegra { 17namespace Tegra {
17struct FramebufferConfig; 18struct FramebufferConfig;
@@ -25,6 +26,10 @@ class GraphicsContext;
25class System; 26class System;
26} // namespace Core 27} // namespace Core
27 28
29namespace VideoCore {
30class RendererBase;
31} // namespace VideoCore
32
28namespace VideoCommon::GPUThread { 33namespace VideoCommon::GPUThread {
29 34
30/// Command to signal to the GPU thread that processing has ended 35/// Command to signal to the GPU thread that processing has ended
@@ -32,30 +37,30 @@ struct EndProcessingCommand final {};
32 37
33/// Command to signal to the GPU thread that a command list is ready for processing 38/// Command to signal to the GPU thread that a command list is ready for processing
34struct SubmitListCommand final { 39struct SubmitListCommand final {
35 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} 40 explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
36 41
37 Tegra::CommandList entries; 42 Tegra::CommandList entries;
38}; 43};
39 44
40/// Command to signal to the GPU thread that a cdma command list is ready for processing 45/// Command to signal to the GPU thread that a cdma command list is ready for processing
41struct SubmitChCommandEntries final { 46struct SubmitChCommandEntries final {
42 explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries) 47 explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries_)
43 : entries{std::move(entries)} {} 48 : entries{std::move(entries_)} {}
44 49
45 Tegra::ChCommandHeaderList entries; 50 Tegra::ChCommandHeaderList entries;
46}; 51};
47 52
48/// Command to signal to the GPU thread that a swap buffers is pending 53/// Command to signal to the GPU thread that a swap buffers is pending
49struct SwapBuffersCommand final { 54struct SwapBuffersCommand final {
50 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) 55 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer_)
51 : framebuffer{std::move(framebuffer)} {} 56 : framebuffer{std::move(framebuffer_)} {}
52 57
53 std::optional<Tegra::FramebufferConfig> framebuffer; 58 std::optional<Tegra::FramebufferConfig> framebuffer;
54}; 59};
55 60
56/// Command to signal to the GPU thread to flush a region 61/// Command to signal to the GPU thread to flush a region
57struct FlushRegionCommand final { 62struct FlushRegionCommand final {
58 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} 63 explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
59 64
60 VAddr addr; 65 VAddr addr;
61 u64 size; 66 u64 size;
@@ -63,7 +68,7 @@ struct FlushRegionCommand final {
63 68
64/// Command to signal to the GPU thread to invalidate a region 69/// Command to signal to the GPU thread to invalidate a region
65struct InvalidateRegionCommand final { 70struct InvalidateRegionCommand final {
66 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} 71 explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
67 72
68 VAddr addr; 73 VAddr addr;
69 u64 size; 74 u64 size;
@@ -71,8 +76,8 @@ struct InvalidateRegionCommand final {
71 76
72/// Command to signal to the GPU thread to flush and invalidate a region 77/// Command to signal to the GPU thread to flush and invalidate a region
73struct FlushAndInvalidateRegionCommand final { 78struct FlushAndInvalidateRegionCommand final {
74 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) 79 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_)
75 : addr{addr}, size{size} {} 80 : addr{addr_}, size{size_} {}
76 81
77 VAddr addr; 82 VAddr addr;
78 u64 size; 83 u64 size;
@@ -92,8 +97,8 @@ using CommandData =
92struct CommandDataContainer { 97struct CommandDataContainer {
93 CommandDataContainer() = default; 98 CommandDataContainer() = default;
94 99
95 CommandDataContainer(CommandData&& data, u64 next_fence) 100 explicit CommandDataContainer(CommandData&& data_, u64 next_fence_)
96 : data{std::move(data)}, fence{next_fence} {} 101 : data{std::move(data_)}, fence{next_fence_} {}
97 102
98 CommandData data; 103 CommandData data;
99 u64 fence{}; 104 u64 fence{};
@@ -112,7 +117,7 @@ struct SynchState final {
112/// Class used to manage the GPU thread 117/// Class used to manage the GPU thread
113class ThreadManager final { 118class ThreadManager final {
114public: 119public:
115 explicit ThreadManager(Core::System& system); 120 explicit ThreadManager(Core::System& system_, bool is_async_);
116 ~ThreadManager(); 121 ~ThreadManager();
117 122
118 /// Creates and starts the GPU thread. 123 /// Creates and starts the GPU thread.
@@ -146,11 +151,11 @@ private:
146 /// Pushes a command to be executed by the GPU thread 151 /// Pushes a command to be executed by the GPU thread
147 u64 PushCommand(CommandData&& command_data); 152 u64 PushCommand(CommandData&& command_data);
148 153
149private:
150 SynchState state; 154 SynchState state;
151 Core::System& system; 155 Core::System& system;
152 std::thread thread; 156 std::thread thread;
153 std::thread::id thread_id; 157 std::thread::id thread_id;
158 const bool is_async;
154}; 159};
155 160
156} // namespace VideoCommon::GPUThread 161} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index 99450777e..21e569ba1 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -19,8 +19,8 @@ namespace VideoCore {
19class GuestDriverProfile { 19class GuestDriverProfile {
20public: 20public:
21 explicit GuestDriverProfile() = default; 21 explicit GuestDriverProfile() = default;
22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size) 22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
23 : texture_handler_size{texture_handler_size} {} 23 : texture_handler_size{texture_handler_size_} {}
24 24
25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); 25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
26 26
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index c157724a9..4c7399d5a 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -1,8 +1,26 @@
1set(SHADER_SOURCES 1set(SHADER_FILES
2 block_linear_unswizzle_2d.comp
3 block_linear_unswizzle_3d.comp
4 convert_depth_to_float.frag
5 convert_float_to_depth.frag
6 full_screen_triangle.vert
7 opengl_copy_bc4.comp
2 opengl_present.frag 8 opengl_present.frag
3 opengl_present.vert 9 opengl_present.vert
10 pitch_unswizzle.comp
11 vulkan_blit_color_float.frag
12 vulkan_blit_depth_stencil.frag
13 vulkan_present.frag
14 vulkan_present.vert
15 vulkan_quad_array.comp
16 vulkan_quad_indexed.comp
17 vulkan_uint8.comp
4) 18)
5 19
20find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
21
22set(GLSL_FLAGS "")
23
6set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) 24set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
7set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) 25set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
8set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) 26set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
@@ -10,27 +28,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
10set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) 28set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
11set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) 29set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
12 30
13foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) 31foreach(FILENAME IN ITEMS ${SHADER_FILES})
14 string(REPLACE "." "_" SHADER_NAME ${FILENAME}) 32 string(REPLACE "." "_" SHADER_NAME ${FILENAME})
15 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) 33 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
16 set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) 34 # Skip generating source headers on Vulkan exclusive files
17 add_custom_command( 35 if (NOT ${FILENAME} MATCHES "vulkan.*")
18 OUTPUT 36 set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
19 ${HEADER_FILE} 37 add_custom_command(
20 COMMAND 38 OUTPUT
21 ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} 39 ${SOURCE_HEADER_FILE}
22 MAIN_DEPENDENCY 40 COMMAND
23 ${SOURCE_FILE} 41 ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE}
24 DEPENDS 42 MAIN_DEPENDENCY
25 ${INPUT_FILE} 43 ${SOURCE_FILE}
26 # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified 44 DEPENDS
27 ) 45 ${INPUT_FILE}
28 set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) 46 # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
47 )
48 set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
49 endif()
50 # Skip compiling to SPIR-V OpenGL exclusive files
51 if (NOT ${FILENAME} MATCHES "opengl.*")
52 string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME)
53 set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h)
54 add_custom_command(
55 OUTPUT
56 ${SPIRV_HEADER_FILE}
57 COMMAND
58 ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
59 MAIN_DEPENDENCY
60 ${SOURCE_FILE}
61 )
62 set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
63 endif()
29endforeach() 64endforeach()
30 65
31add_custom_target(host_shaders 66add_custom_target(host_shaders
32 DEPENDS 67 DEPENDS
33 ${SHADER_HEADERS} 68 ${SHADER_HEADERS}
34 SOURCES 69 SOURCES
35 ${SHADER_SOURCES} 70 ${SHADER_FILES}
36) 71)
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
new file mode 100644
index 000000000..a131be79e
--- /dev/null
+++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
@@ -0,0 +1,122 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430
6
7#ifdef VULKAN
8
9#extension GL_EXT_shader_16bit_storage : require
10#extension GL_EXT_shader_8bit_storage : require
11#define HAS_EXTENDED_TYPES 1
12#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
13#define END_PUSH_CONSTANTS };
14#define UNIFORM(n)
15#define BINDING_SWIZZLE_BUFFER 0
16#define BINDING_INPUT_BUFFER 1
17#define BINDING_OUTPUT_IMAGE 2
18
19#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
20
21#extension GL_NV_gpu_shader5 : enable
22#ifdef GL_NV_gpu_shader5
23#define HAS_EXTENDED_TYPES 1
24#else
25#define HAS_EXTENDED_TYPES 0
26#endif
27#define BEGIN_PUSH_CONSTANTS
28#define END_PUSH_CONSTANTS
29#define UNIFORM(n) layout (location = n) uniform
30#define BINDING_SWIZZLE_BUFFER 0
31#define BINDING_INPUT_BUFFER 1
32#define BINDING_OUTPUT_IMAGE 0
33
34#endif
35
36BEGIN_PUSH_CONSTANTS
37UNIFORM(0) uvec3 origin;
38UNIFORM(1) ivec3 destination;
39UNIFORM(2) uint bytes_per_block_log2;
40UNIFORM(3) uint layer_stride;
41UNIFORM(4) uint block_size;
42UNIFORM(5) uint x_shift;
43UNIFORM(6) uint block_height;
44UNIFORM(7) uint block_height_mask;
45END_PUSH_CONSTANTS
46
47layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
48 uint swizzle_table[];
49};
50
51#if HAS_EXTENDED_TYPES
52layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
53layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
54#endif
55layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
56layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
57layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
58
59layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image;
60
61layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
62
63const uint GOB_SIZE_X = 64;
64const uint GOB_SIZE_Y = 8;
65const uint GOB_SIZE_Z = 1;
66const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
67
68const uint GOB_SIZE_X_SHIFT = 6;
69const uint GOB_SIZE_Y_SHIFT = 3;
70const uint GOB_SIZE_Z_SHIFT = 0;
71const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
72
73const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
74
75uint SwizzleOffset(uvec2 pos) {
76 pos = pos & SWIZZLE_MASK;
77 return swizzle_table[pos.y * 64 + pos.x];
78}
79
80uvec4 ReadTexel(uint offset) {
81 switch (bytes_per_block_log2) {
82#if HAS_EXTENDED_TYPES
83 case 0:
84 return uvec4(u8data[offset], 0, 0, 0);
85 case 1:
86 return uvec4(u16data[offset / 2], 0, 0, 0);
87#else
88 case 0:
89 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
90 case 1:
91 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
92#endif
93 case 2:
94 return uvec4(u32data[offset / 4], 0, 0, 0);
95 case 3:
96 return uvec4(u64data[offset / 8], 0, 0);
97 case 4:
98 return u128data[offset / 16];
99 }
100 return uvec4(0);
101}
102
103void main() {
104 uvec3 pos = gl_GlobalInvocationID + origin;
105 pos.x <<= bytes_per_block_log2;
106
107 // Read as soon as possible due to its latency
108 const uint swizzle = SwizzleOffset(pos.xy);
109
110 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
111
112 uint offset = 0;
113 offset += pos.z * layer_stride;
114 offset += (block_y >> block_height) * block_size;
115 offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
116 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
117 offset += swizzle;
118
119 const uvec4 texel = ReadTexel(offset);
120 const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
121 imageStore(output_image, coord, texel);
122}
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
new file mode 100644
index 000000000..bb6872e6b
--- /dev/null
+++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
@@ -0,0 +1,125 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430
6
7#ifdef VULKAN
8
9#extension GL_EXT_shader_16bit_storage : require
10#extension GL_EXT_shader_8bit_storage : require
11#define HAS_EXTENDED_TYPES 1
12#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
13#define END_PUSH_CONSTANTS };
14#define UNIFORM(n)
15#define BINDING_SWIZZLE_BUFFER 0
16#define BINDING_INPUT_BUFFER 1
17#define BINDING_OUTPUT_IMAGE 2
18
19#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
20
21#extension GL_NV_gpu_shader5 : enable
22#ifdef GL_NV_gpu_shader5
23#define HAS_EXTENDED_TYPES 1
24#else
25#define HAS_EXTENDED_TYPES 0
26#endif
27#define BEGIN_PUSH_CONSTANTS
28#define END_PUSH_CONSTANTS
29#define UNIFORM(n) layout (location = n) uniform
30#define BINDING_SWIZZLE_BUFFER 0
31#define BINDING_INPUT_BUFFER 1
32#define BINDING_OUTPUT_IMAGE 0
33
34#endif
35
36BEGIN_PUSH_CONSTANTS
37UNIFORM(0) uvec3 origin;
38UNIFORM(1) ivec3 destination;
39UNIFORM(2) uint bytes_per_block_log2;
40UNIFORM(3) uint slice_size;
41UNIFORM(4) uint block_size;
42UNIFORM(5) uint x_shift;
43UNIFORM(6) uint block_height;
44UNIFORM(7) uint block_height_mask;
45UNIFORM(8) uint block_depth;
46UNIFORM(9) uint block_depth_mask;
47END_PUSH_CONSTANTS
48
49layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
50 uint swizzle_table[];
51};
52
53#if HAS_EXTENDED_TYPES
54layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
55layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
56#endif
57layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
58layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
59layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
60
61layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image;
62
63layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in;
64
65const uint GOB_SIZE_X = 64;
66const uint GOB_SIZE_Y = 8;
67const uint GOB_SIZE_Z = 1;
68const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
69
70const uint GOB_SIZE_X_SHIFT = 6;
71const uint GOB_SIZE_Y_SHIFT = 3;
72const uint GOB_SIZE_Z_SHIFT = 0;
73const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
74
75const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
76
77uint SwizzleOffset(uvec2 pos) {
78 pos = pos & SWIZZLE_MASK;
79 return swizzle_table[pos.y * 64 + pos.x];
80}
81
82uvec4 ReadTexel(uint offset) {
83 switch (bytes_per_block_log2) {
84#if HAS_EXTENDED_TYPES
85 case 0:
86 return uvec4(u8data[offset], 0, 0, 0);
87 case 1:
88 return uvec4(u16data[offset / 2], 0, 0, 0);
89#else
90 case 0:
91 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
92 case 1:
93 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
94#endif
95 case 2:
96 return uvec4(u32data[offset / 4], 0, 0, 0);
97 case 3:
98 return uvec4(u64data[offset / 8], 0, 0);
99 case 4:
100 return u128data[offset / 16];
101 }
102 return uvec4(0);
103}
104
105void main() {
106 uvec3 pos = gl_GlobalInvocationID + origin;
107 pos.x <<= bytes_per_block_log2;
108
109 // Read as soon as possible due to its latency
110 const uint swizzle = SwizzleOffset(pos.xy);
111
112 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
113
114 uint offset = 0;
115 offset += (pos.z >> block_depth) * slice_size;
116 offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height);
117 offset += (block_y >> block_height) * block_size;
118 offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
119 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
120 offset += swizzle;
121
122 const uvec4 texel = ReadTexel(offset);
123 const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
124 imageStore(output_image, coord, texel);
125}
diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag
new file mode 100644
index 000000000..624c58509
--- /dev/null
+++ b/src/video_core/host_shaders/convert_depth_to_float.frag
@@ -0,0 +1,13 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7layout(binding = 0) uniform sampler2D depth_texture;
8layout(location = 0) out float output_color;
9
10void main() {
11 ivec2 coord = ivec2(gl_FragCoord.xy);
12 output_color = texelFetch(depth_texture, coord, 0).r;
13}
diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag
new file mode 100644
index 000000000..d86c795f4
--- /dev/null
+++ b/src/video_core/host_shaders/convert_float_to_depth.frag
@@ -0,0 +1,13 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7layout(binding = 0) uniform sampler2D color_texture;
8
9void main() {
10 ivec2 coord = ivec2(gl_FragCoord.xy);
11 float color = texelFetch(color_texture, coord, 0).r;
12 gl_FragDepth = color;
13}
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
new file mode 100644
index 000000000..452ad6502
--- /dev/null
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -0,0 +1,29 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7#ifdef VULKAN
8#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
9#define END_PUSH_CONSTANTS };
10#define UNIFORM(n)
11#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
12#define BEGIN_PUSH_CONSTANTS
13#define END_PUSH_CONSTANTS
14#define UNIFORM(n) layout (location = n) uniform
15#endif
16
17BEGIN_PUSH_CONSTANTS
18UNIFORM(0) vec2 tex_scale;
19UNIFORM(1) vec2 tex_offset;
20END_PUSH_CONSTANTS
21
22layout(location = 0) out vec2 texcoord;
23
24void main() {
25 float x = float((gl_VertexIndex & 1) << 2);
26 float y = float((gl_VertexIndex & 2) << 1);
27 gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
28 texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
29}
diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp
new file mode 100644
index 000000000..7b8e20fbe
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_copy_bc4.comp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430 core
6#extension GL_ARB_gpu_shader_int64 : require
7
8layout (local_size_x = 4, local_size_y = 4) in;
9
10layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input;
11layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output;
12
13layout(location = 0) uniform uvec3 src_offset;
14layout(location = 1) uniform uvec3 dst_offset;
15
16// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
17uint DecompressBlock(uint64_t bits, uvec2 coord) {
18 const uint code_offset = 16 + 3 * (4 * coord.y + coord.x);
19 const uint code = uint(bits >> code_offset) & 7;
20 const uint red0 = uint(bits >> 0) & 0xff;
21 const uint red1 = uint(bits >> 8) & 0xff;
22 if (red0 > red1) {
23 switch (code) {
24 case 0:
25 return red0;
26 case 1:
27 return red1;
28 case 2:
29 return (6 * red0 + 1 * red1) / 7;
30 case 3:
31 return (5 * red0 + 2 * red1) / 7;
32 case 4:
33 return (4 * red0 + 3 * red1) / 7;
34 case 5:
35 return (3 * red0 + 4 * red1) / 7;
36 case 6:
37 return (2 * red0 + 5 * red1) / 7;
38 case 7:
39 return (1 * red0 + 6 * red1) / 7;
40 }
41 } else {
42 switch (code) {
43 case 0:
44 return red0;
45 case 1:
46 return red1;
47 case 2:
48 return (4 * red0 + 1 * red1) / 5;
49 case 3:
50 return (3 * red0 + 2 * red1) / 5;
51 case 4:
52 return (2 * red0 + 3 * red1) / 5;
53 case 5:
54 return (1 * red0 + 4 * red1) / 5;
55 case 6:
56 return 0;
57 case 7:
58 return 0xff;
59 }
60 }
61 return 0;
62}
63
64void main() {
65 uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg;
66 uint64_t bits = packUint2x32(packed_bits);
67 uint red = DecompressBlock(bits, gl_LocalInvocationID.xy);
68 uvec4 color = uvec4(red & 0xff, 0, 0, 0xff);
69 imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color);
70}
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
index 8a4cb024b..84b818227 100644
--- a/src/video_core/host_shaders/opengl_present.frag
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -1,3 +1,7 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
1#version 430 core 5#version 430 core
2 6
3layout (location = 0) in vec2 frag_tex_coord; 7layout (location = 0) in vec2 frag_tex_coord;
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
index 2235d31a4..c3b5adbba 100644
--- a/src/video_core/host_shaders/opengl_present.vert
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -1,3 +1,7 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
1#version 430 core 5#version 430 core
2 6
3out gl_PerVertex { 7out gl_PerVertex {
diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp
new file mode 100644
index 000000000..cb48ec170
--- /dev/null
+++ b/src/video_core/host_shaders/pitch_unswizzle.comp
@@ -0,0 +1,86 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430
6
7#ifdef VULKAN
8
9#extension GL_EXT_shader_16bit_storage : require
10#extension GL_EXT_shader_8bit_storage : require
11#define HAS_EXTENDED_TYPES 1
12#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
13#define END_PUSH_CONSTANTS };
14#define UNIFORM(n)
15#define BINDING_INPUT_BUFFER 0
16#define BINDING_OUTPUT_IMAGE 1
17
18#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
19
20#extension GL_NV_gpu_shader5 : enable
21#ifdef GL_NV_gpu_shader5
22#define HAS_EXTENDED_TYPES 1
23#else
24#define HAS_EXTENDED_TYPES 0
25#endif
26#define BEGIN_PUSH_CONSTANTS
27#define END_PUSH_CONSTANTS
28#define UNIFORM(n) layout (location = n) uniform
29#define BINDING_INPUT_BUFFER 0
30#define BINDING_OUTPUT_IMAGE 0
31
32#endif
33
34BEGIN_PUSH_CONSTANTS
35UNIFORM(0) uvec2 origin;
36UNIFORM(1) ivec2 destination;
37UNIFORM(2) uint bytes_per_block;
38UNIFORM(3) uint pitch;
39END_PUSH_CONSTANTS
40
41#if HAS_EXTENDED_TYPES
42layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; };
43layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; };
44#endif
45layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; };
46layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; };
47layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; };
48
49layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image;
50
51layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
52
53uvec4 ReadTexel(uint offset) {
54 switch (bytes_per_block) {
55#if HAS_EXTENDED_TYPES
56 case 1:
57 return uvec4(u8data[offset], 0, 0, 0);
58 case 2:
59 return uvec4(u16data[offset / 2], 0, 0, 0);
60#else
61 case 1:
62 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
63 case 2:
64 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
65#endif
66 case 4:
67 return uvec4(u32data[offset / 4], 0, 0, 0);
68 case 8:
69 return uvec4(u64data[offset / 8], 0, 0);
70 case 16:
71 return u128data[offset / 16];
72 }
73 return uvec4(0);
74}
75
76void main() {
77 uvec2 pos = gl_GlobalInvocationID.xy + origin;
78
79 uint offset = 0;
80 offset += pos.x * bytes_per_block;
81 offset += pos.y * pitch;
82
83 const uvec4 texel = ReadTexel(offset);
84 const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination;
85 imageStore(output_image, coord, texel);
86}
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag
new file mode 100644
index 000000000..4a6aae410
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag
@@ -0,0 +1,14 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7layout(binding = 0) uniform sampler2D tex;
8
9layout(location = 0) in vec2 texcoord;
10layout(location = 0) out vec4 color;
11
12void main() {
13 color = textureLod(tex, texcoord, 0);
14}
diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
new file mode 100644
index 000000000..19bb23a5a
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
@@ -0,0 +1,16 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6#extension GL_ARB_shader_stencil_export : require
7
8layout(binding = 0) uniform sampler2D depth_tex;
9layout(binding = 1) uniform isampler2D stencil_tex;
10
11layout(location = 0) in vec2 texcoord;
12
13void main() {
14 gl_FragDepth = textureLod(depth_tex, texcoord, 0).r;
15 gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r;
16}
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/host_shaders/vulkan_present.frag
index a06ecd24a..0979ff3e6 100644
--- a/src/video_core/renderer_vulkan/shaders/blit.frag
+++ b/src/video_core/host_shaders/vulkan_present.frag
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (location = 0) in vec2 frag_tex_coord; 7layout (location = 0) in vec2 frag_tex_coord;
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/host_shaders/vulkan_present.vert
index c64d9235a..00b868958 100644
--- a/src/video_core/renderer_vulkan/shaders/blit.vert
+++ b/src/video_core/host_shaders/vulkan_present.vert
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (location = 0) in vec2 vert_position; 7layout (location = 0) in vec2 vert_position;
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp
index 5a5703308..212f4e998 100644
--- a/src/video_core/renderer_vulkan/shaders/quad_array.comp
+++ b/src/video_core/host_shaders/vulkan_quad_array.comp
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (local_size_x = 1024) in; 7layout (local_size_x = 1024) in;
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp
index 5a472ba9b..8655591d0 100644
--- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
+++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V quad_indexed.comp -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (local_size_x = 1024) in; 7layout (local_size_x = 1024) in;
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index a320f3ae0..ad74d7af9 100644
--- a/src/video_core/renderer_vulkan/shaders/uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15#extension GL_EXT_shader_16bit_storage : require 6#extension GL_EXT_shader_16bit_storage : require
16#extension GL_EXT_shader_8bit_storage : require 7#extension GL_EXT_shader_8bit_storage : require
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index df00b57df..70ac7c620 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -85,7 +85,7 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
85 {0x0217920100488FF7, &HLE_0217920100488FF7}, 85 {0x0217920100488FF7, &HLE_0217920100488FF7},
86}}; 86}};
87 87
88HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 88HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
89HLEMacro::~HLEMacro() = default; 89HLEMacro::~HLEMacro() = default;
90 90
91std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { 91std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
@@ -99,8 +99,8 @@ std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) co
99 99
100HLEMacroImpl::~HLEMacroImpl() = default; 100HLEMacroImpl::~HLEMacroImpl() = default;
101 101
102HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) 102HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
103 : maxwell3d(maxwell3d), func(func) {} 103 : maxwell3d{maxwell3d_}, func{func_} {}
104 104
105void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { 105void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
106 func(maxwell3d, parameters); 106 func(maxwell3d, parameters);
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
index 37af875a0..cb3bd1600 100644
--- a/src/video_core/macro/macro_hle.h
+++ b/src/video_core/macro/macro_hle.h
@@ -20,7 +20,7 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u3
20 20
21class HLEMacro { 21class HLEMacro {
22public: 22public:
23 explicit HLEMacro(Engines::Maxwell3D& maxwell3d); 23 explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
24 ~HLEMacro(); 24 ~HLEMacro();
25 25
26 std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; 26 std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index bd01fd1f2..8da26fd59 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -11,29 +11,29 @@
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); 11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12 12
13namespace Tegra { 13namespace Tegra {
14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) 14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
15 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} 15 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
16 16
17std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { 17std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
18 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); 18 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
19} 19}
20 20
21MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, 21MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_,
22 const std::vector<u32>& code) 22 const std::vector<u32>& code_)
23 : maxwell3d(maxwell3d), code(code) {} 23 : maxwell3d{maxwell3d_}, code{code_} {}
24 24
25void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) { 25void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
26 MICROPROFILE_SCOPE(MacroInterp); 26 MICROPROFILE_SCOPE(MacroInterp);
27 Reset(); 27 Reset();
28 28
29 registers[1] = parameters[0]; 29 registers[1] = params[0];
30 num_parameters = parameters.size(); 30 num_parameters = params.size();
31 31
32 if (num_parameters > parameters_capacity) { 32 if (num_parameters > parameters_capacity) {
33 parameters_capacity = num_parameters; 33 parameters_capacity = num_parameters;
34 this->parameters = std::make_unique<u32[]>(num_parameters); 34 parameters = std::make_unique<u32[]>(num_parameters);
35 } 35 }
36 std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); 36 std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32));
37 37
38 // Execute the code until we hit an exit condition. 38 // Execute the code until we hit an exit condition.
39 bool keep_executing = true; 39 bool keep_executing = true;
@@ -133,8 +133,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
133 break; 133 break;
134 } 134 }
135 default: 135 default:
136 UNIMPLEMENTED_MSG("Unimplemented macro operation {}", 136 UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value());
137 static_cast<u32>(opcode.operation.Value()));
138 } 137 }
139 138
140 // An instruction with the Exit flag will not actually 139 // An instruction with the Exit flag will not actually
@@ -182,7 +181,7 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a,
182 return ~(src_a & src_b); 181 return ~(src_a & src_b);
183 182
184 default: 183 default:
185 UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast<u32>(operation)); 184 UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation);
186 return 0; 185 return 0;
187 } 186 }
188} 187}
@@ -230,7 +229,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r
230 Send((result >> 12) & 0b111111); 229 Send((result >> 12) & 0b111111);
231 break; 230 break;
232 default: 231 default:
233 UNIMPLEMENTED_MSG("Unimplemented result operation {}", static_cast<u32>(operation)); 232 UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
234 } 233 }
235} 234}
236 235
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
index 90217fc89..d50c619ce 100644
--- a/src/video_core/macro/macro_interpreter.h
+++ b/src/video_core/macro/macro_interpreter.h
@@ -17,7 +17,7 @@ class Maxwell3D;
17 17
18class MacroInterpreter final : public MacroEngine { 18class MacroInterpreter final : public MacroEngine {
19public: 19public:
20 explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); 20 explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_);
21 21
22protected: 22protected:
23 std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; 23 std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
@@ -28,8 +28,8 @@ private:
28 28
29class MacroInterpreterImpl : public CachedMacro { 29class MacroInterpreterImpl : public CachedMacro {
30public: 30public:
31 MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); 31 explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
32 void Execute(const std::vector<u32>& parameters, u32 method) override; 32 void Execute(const std::vector<u32>& params, u32 method) override;
33 33
34private: 34private:
35 /// Resets the execution engine state, zeroing registers, etc. 35 /// Resets the execution engine state, zeroing registers, etc.
@@ -38,9 +38,9 @@ private:
38 /** 38 /**
39 * Executes a single macro instruction located at the current program counter. Returns whether 39 * Executes a single macro instruction located at the current program counter. Returns whether
40 * the interpreter should keep running. 40 * the interpreter should keep running.
41 * @param offset Offset to start execution at. 41 *
42 * @param is_delay_slot Whether the current step is being executed due to a delay slot in a 42 * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
43 * previous instruction. 43 * previous instruction.
44 */ 44 */
45 bool Step(bool is_delay_slot); 45 bool Step(bool is_delay_slot);
46 46
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 954b87515..c6b2b2109 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -28,15 +28,15 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
28 BRANCH_HOLDER, 28 BRANCH_HOLDER,
29}); 29});
30 30
31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) 31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
32 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} 32 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
33 33
34std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { 34std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
35 return std::make_unique<MacroJITx64Impl>(maxwell3d, code); 35 return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
36} 36}
37 37
38MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code) 38MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
39 : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { 39 : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
40 Compile(); 40 Compile();
41} 41}
42 42
@@ -165,8 +165,7 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
165 } 165 }
166 break; 166 break;
167 default: 167 default:
168 UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", 168 UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
169 static_cast<std::size_t>(opcode.alu_operation.Value()));
170 break; 169 break;
171 } 170 }
172 Compile_ProcessResult(opcode.result_operation, opcode.dst); 171 Compile_ProcessResult(opcode.result_operation, opcode.dst);
@@ -553,15 +552,15 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
553} 552}
554 553
555void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { 554void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
556 const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { 555 const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
557 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero 556 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
558 // register. 557 // register.
559 if (reg == 0) { 558 if (reg_index == 0) {
560 return; 559 return;
561 } 560 }
562 mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); 561 mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result);
563 }; 562 };
564 const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; 563 const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); };
565 564
566 switch (operation) { 565 switch (operation) {
567 case Macro::ResultOperation::IgnoreAndFetch: 566 case Macro::ResultOperation::IgnoreAndFetch:
@@ -604,7 +603,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
604 Compile_Send(RESULT); 603 Compile_Send(RESULT);
605 break; 604 break;
606 default: 605 default:
607 UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation)); 606 UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
608 } 607 }
609} 608}
610 609
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index a180e7428..7f50ac2f8 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -23,7 +23,7 @@ constexpr size_t MAX_CODE_SIZE = 0x10000;
23 23
24class MacroJITx64 final : public MacroEngine { 24class MacroJITx64 final : public MacroEngine {
25public: 25public:
26 explicit MacroJITx64(Engines::Maxwell3D& maxwell3d); 26 explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
27 27
28protected: 28protected:
29 std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; 29 std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
@@ -34,7 +34,7 @@ private:
34 34
35class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { 35class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
36public: 36public:
37 MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); 37 explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
38 ~MacroJITx64Impl(); 38 ~MacroJITx64Impl();
39 39
40 void Execute(const std::vector<u32>& parameters, u32 method) override; 40 void Execute(const std::vector<u32>& parameters, u32 method) override;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 6e70bd362..65feff588 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
57 } 57 }
58 58
59 // Flush and invalidate through the GPU interface, to be asynchronous if possible. 59 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
60 system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); 60 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
61 ASSERT(cpu_addr);
62
63 rasterizer->UnmapMemory(*cpu_addr, size);
61 64
62 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); 65 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
63} 66}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index c078193d9..c35e57689 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -28,7 +28,7 @@ public:
28 }; 28 };
29 29
30 constexpr PageEntry() = default; 30 constexpr PageEntry() = default;
31 constexpr PageEntry(State state) : state{state} {} 31 constexpr PageEntry(State state_) : state{state_} {}
32 constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {} 32 constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
33 33
34 [[nodiscard]] constexpr bool IsUnmapped() const { 34 [[nodiscard]] constexpr bool IsUnmapped() const {
@@ -68,7 +68,7 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
68 68
69class MemoryManager final { 69class MemoryManager final {
70public: 70public:
71 explicit MemoryManager(Core::System& system); 71 explicit MemoryManager(Core::System& system_);
72 ~MemoryManager(); 72 ~MemoryManager();
73 73
74 /// Binds a renderer to the memory manager. 74 /// Binds a renderer to the memory manager.
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 9da9fb4ff..e69de29bb 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -1,250 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/morton.h"
10#include "video_core/surface.h"
11#include "video_core/textures/decoders.h"
12
13namespace VideoCore {
14
15using Surface::GetBytesPerPixel;
16using Surface::PixelFormat;
17
18using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
19using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
20
21template <bool morton_to_linear, PixelFormat format>
22static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
23 u32 tile_width_spacing, u8* buffer, u8* addr) {
24 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
25
26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
27 // pixel values.
28 constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
29 constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
30
31 if constexpr (morton_to_linear) {
32 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
33 stride, height, depth, block_height, block_depth,
34 tile_width_spacing);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
39 block_height, block_depth, tile_width_spacing);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
45 MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
46 MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
47 MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
48 MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
49 MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
50 MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
51 MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
52 MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
53 MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
54 MortonCopy<true, PixelFormat::R8_UNORM>,
55 MortonCopy<true, PixelFormat::R8_SNORM>,
56 MortonCopy<true, PixelFormat::R8_SINT>,
57 MortonCopy<true, PixelFormat::R8_UINT>,
58 MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
59 MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
60 MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
61 MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
62 MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
63 MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
64 MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
65 MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
66 MortonCopy<true, PixelFormat::BC2_UNORM>,
67 MortonCopy<true, PixelFormat::BC3_UNORM>,
68 MortonCopy<true, PixelFormat::BC4_UNORM>,
69 MortonCopy<true, PixelFormat::BC4_SNORM>,
70 MortonCopy<true, PixelFormat::BC5_UNORM>,
71 MortonCopy<true, PixelFormat::BC5_SNORM>,
72 MortonCopy<true, PixelFormat::BC7_UNORM>,
73 MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
74 MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
75 MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
76 MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
77 MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
78 MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
79 MortonCopy<true, PixelFormat::R32G32_FLOAT>,
80 MortonCopy<true, PixelFormat::R32G32_SINT>,
81 MortonCopy<true, PixelFormat::R32_FLOAT>,
82 MortonCopy<true, PixelFormat::R16_FLOAT>,
83 MortonCopy<true, PixelFormat::R16_UNORM>,
84 MortonCopy<true, PixelFormat::R16_SNORM>,
85 MortonCopy<true, PixelFormat::R16_UINT>,
86 MortonCopy<true, PixelFormat::R16_SINT>,
87 MortonCopy<true, PixelFormat::R16G16_UNORM>,
88 MortonCopy<true, PixelFormat::R16G16_FLOAT>,
89 MortonCopy<true, PixelFormat::R16G16_UINT>,
90 MortonCopy<true, PixelFormat::R16G16_SINT>,
91 MortonCopy<true, PixelFormat::R16G16_SNORM>,
92 MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
93 MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
94 MortonCopy<true, PixelFormat::R8G8_UNORM>,
95 MortonCopy<true, PixelFormat::R8G8_SNORM>,
96 MortonCopy<true, PixelFormat::R8G8_SINT>,
97 MortonCopy<true, PixelFormat::R8G8_UINT>,
98 MortonCopy<true, PixelFormat::R32G32_UINT>,
99 MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
100 MortonCopy<true, PixelFormat::R32_UINT>,
101 MortonCopy<true, PixelFormat::R32_SINT>,
102 MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
103 MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
104 MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
105 MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
106 MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
107 MortonCopy<true, PixelFormat::BC2_SRGB>,
108 MortonCopy<true, PixelFormat::BC3_SRGB>,
109 MortonCopy<true, PixelFormat::BC7_SRGB>,
110 MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
111 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
112 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
113 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
114 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
115 MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
116 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
117 MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
118 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
119 MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
120 MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
121 MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
122 MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
123 MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
124 MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
125 MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
126 MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
127 MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
128 MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
129 MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
130 MortonCopy<true, PixelFormat::D32_FLOAT>,
131 MortonCopy<true, PixelFormat::D16_UNORM>,
132 MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
133 MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
134 MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
135};
136
137static constexpr ConversionArray linear_to_morton_fns = {
138 MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
139 MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
140 MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
141 MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
142 MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
143 MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
144 MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
145 MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
146 MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
147 MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
148 MortonCopy<false, PixelFormat::R8_UNORM>,
149 MortonCopy<false, PixelFormat::R8_SNORM>,
150 MortonCopy<false, PixelFormat::R8_SINT>,
151 MortonCopy<false, PixelFormat::R8_UINT>,
152 MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
153 MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
154 MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
155 MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
156 MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
157 MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
158 MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
159 MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
160 MortonCopy<false, PixelFormat::BC2_UNORM>,
161 MortonCopy<false, PixelFormat::BC3_UNORM>,
162 MortonCopy<false, PixelFormat::BC4_UNORM>,
163 MortonCopy<false, PixelFormat::BC4_SNORM>,
164 MortonCopy<false, PixelFormat::BC5_UNORM>,
165 MortonCopy<false, PixelFormat::BC5_SNORM>,
166 MortonCopy<false, PixelFormat::BC7_UNORM>,
167 MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
168 MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
169 // TODO(Subv): Swizzling ASTC formats are not supported
170 nullptr,
171 MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
172 MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
173 MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
174 MortonCopy<false, PixelFormat::R32G32_FLOAT>,
175 MortonCopy<false, PixelFormat::R32G32_SINT>,
176 MortonCopy<false, PixelFormat::R32_FLOAT>,
177 MortonCopy<false, PixelFormat::R16_FLOAT>,
178 MortonCopy<false, PixelFormat::R16_UNORM>,
179 MortonCopy<false, PixelFormat::R16_SNORM>,
180 MortonCopy<false, PixelFormat::R16_UINT>,
181 MortonCopy<false, PixelFormat::R16_SINT>,
182 MortonCopy<false, PixelFormat::R16G16_UNORM>,
183 MortonCopy<false, PixelFormat::R16G16_FLOAT>,
184 MortonCopy<false, PixelFormat::R16G16_UINT>,
185 MortonCopy<false, PixelFormat::R16G16_SINT>,
186 MortonCopy<false, PixelFormat::R16G16_SNORM>,
187 MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
188 MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
189 MortonCopy<false, PixelFormat::R8G8_UNORM>,
190 MortonCopy<false, PixelFormat::R8G8_SNORM>,
191 MortonCopy<false, PixelFormat::R8G8_SINT>,
192 MortonCopy<false, PixelFormat::R8G8_UINT>,
193 MortonCopy<false, PixelFormat::R32G32_UINT>,
194 MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
195 MortonCopy<false, PixelFormat::R32_UINT>,
196 MortonCopy<false, PixelFormat::R32_SINT>,
197 nullptr,
198 nullptr,
199 nullptr,
200 MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
201 MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
202 MortonCopy<false, PixelFormat::BC2_SRGB>,
203 MortonCopy<false, PixelFormat::BC3_SRGB>,
204 MortonCopy<false, PixelFormat::BC7_SRGB>,
205 MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
206 nullptr,
207 nullptr,
208 nullptr,
209 nullptr,
210 nullptr,
211 nullptr,
212 nullptr,
213 nullptr,
214 nullptr,
215 nullptr,
216 nullptr,
217 nullptr,
218 nullptr,
219 nullptr,
220 nullptr,
221 nullptr,
222 nullptr,
223 nullptr,
224 MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
225 MortonCopy<false, PixelFormat::D32_FLOAT>,
226 MortonCopy<false, PixelFormat::D16_UNORM>,
227 MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
228 MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
229 MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
230};
231
232static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
233 switch (mode) {
234 case MortonSwizzleMode::MortonToLinear:
235 return morton_to_linear_fns[static_cast<std::size_t>(format)];
236 case MortonSwizzleMode::LinearToMorton:
237 return linear_to_morton_fns[static_cast<std::size_t>(format)];
238 }
239 UNREACHABLE();
240 return morton_to_linear_fns[static_cast<std::size_t>(format)];
241}
242
243void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
244 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
245 u8* buffer, u8* addr) {
246 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
247 tile_width_spacing, buffer, addr);
248}
249
250} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index b714a7e3f..e69de29bb 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -1,18 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, u8* addr);
17
18} // namespace VideoCore
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index fc54ca0ef..203f2af05 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -28,8 +28,8 @@ namespace VideoCommon {
28template <class QueryCache, class HostCounter> 28template <class QueryCache, class HostCounter>
29class CounterStreamBase { 29class CounterStreamBase {
30public: 30public:
31 explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) 31 explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
32 : cache{cache}, type{type} {} 32 : cache{cache_}, type{type_} {}
33 33
34 /// Updates the state of the stream, enabling or disabling as needed. 34 /// Updates the state of the stream, enabling or disabling as needed.
35 void Update(bool enabled) { 35 void Update(bool enabled) {
@@ -334,8 +334,8 @@ private:
334template <class HostCounter> 334template <class HostCounter>
335class CachedQueryBase { 335class CachedQueryBase {
336public: 336public:
337 explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) 337 explicit CachedQueryBase(VAddr cpu_addr_, u8* host_ptr_)
338 : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} 338 : cpu_addr{cpu_addr_}, host_ptr{host_ptr_} {}
339 virtual ~CachedQueryBase() = default; 339 virtual ~CachedQueryBase() = default;
340 340
341 CachedQueryBase(CachedQueryBase&&) noexcept = default; 341 CachedQueryBase(CachedQueryBase&&) noexcept = default;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 27ef4c69a..0cb0f387d 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -76,6 +76,9 @@ public:
76 /// Sync memory between guest and host. 76 /// Sync memory between guest and host.
77 virtual void SyncGuestHost() = 0; 77 virtual void SyncGuestHost() = 0;
78 78
79 /// Unmap memory range
80 virtual void UnmapMemory(VAddr addr, u64 size) = 0;
81
79 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 82 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
80 /// and invalidated 83 /// and invalidated
81 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 84 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
@@ -83,6 +86,12 @@ public:
83 /// Notify the host renderer to wait for previous primitive and compute operations. 86 /// Notify the host renderer to wait for previous primitive and compute operations.
84 virtual void WaitForIdle() = 0; 87 virtual void WaitForIdle() = 0;
85 88
89 /// Notify the host renderer to wait for reads and writes to render targets and flush caches.
90 virtual void FragmentBarrier() = 0;
91
92 /// Notify the host renderer to make available previous render target writes.
93 virtual void TiledCacheBarrier() = 0;
94
86 /// Notify the rasterizer to send all written commands to the host GPU. 95 /// Notify the rasterizer to send all written commands to the host GPU.
87 virtual void FlushCommands() = 0; 96 virtual void FlushCommands() = 0;
88 97
@@ -91,8 +100,7 @@ public:
91 100
92 /// Attempt to use a faster method to perform a surface copy 101 /// Attempt to use a faster method to perform a surface copy
93 [[nodiscard]] virtual bool AccelerateSurfaceCopy( 102 [[nodiscard]] virtual bool AccelerateSurfaceCopy(
94 const Tegra::Engines::Fermi2D::Regs::Surface& src, 103 const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
95 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
96 const Tegra::Engines::Fermi2D::Config& copy_config) { 104 const Tegra::Engines::Fermi2D::Config& copy_config) {
97 return false; 105 return false;
98 } 106 }
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index d6120c23e..3e4d88c30 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -71,7 +71,7 @@ std::string_view GetInputFlags(PixelImap attribute) {
71 case PixelImap::Unused: 71 case PixelImap::Unused:
72 break; 72 break;
73 } 73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); 74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
75 return {}; 75 return {};
76} 76}
77 77
@@ -123,7 +123,7 @@ std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::Primitive
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: 123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY"; 124 return "TRIANGLES_ADJACENCY";
125 default: 125 default:
126 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); 126 UNIMPLEMENTED_MSG("topology={}", topology);
127 return "POINTS"; 127 return "POINTS";
128 } 128 }
129} 129}
@@ -137,7 +137,7 @@ std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
137 case Tegra::Shader::OutputTopology::TriangleStrip: 137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP"; 138 return "TRIANGLE_STRIP";
139 default: 139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); 140 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
141 return "points"; 141 return "points";
142 } 142 }
143} 143}
@@ -187,8 +187,8 @@ std::string TextureType(const MetaTexture& meta) {
187 187
188class ARBDecompiler final { 188class ARBDecompiler final {
189public: 189public:
190 explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 190 explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
191 ShaderType stage, std::string_view identifier); 191 ShaderType stage_, std::string_view identifier);
192 192
193 std::string Code() const { 193 std::string Code() const {
194 return shader_source; 194 return shader_source;
@@ -802,9 +802,9 @@ private:
802 }; 802 };
803}; 803};
804 804
805ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 805ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
806 ShaderType stage, std::string_view identifier) 806 ShaderType stage_, std::string_view identifier)
807 : device{device}, ir{ir}, registry{registry}, stage{stage} { 807 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
808 DefineGlobalMemory(); 808 DefineGlobalMemory();
809 809
810 AddLine("TEMP RC;"); 810 AddLine("TEMP RC;");
@@ -1134,44 +1134,44 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
1134 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { 1134 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1135 VisitAST(current); 1135 VisitAST(current);
1136 } 1136 }
1137 } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { 1137 } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1138 const std::string condition = VisitExpression(ast->condition); 1138 const std::string condition = VisitExpression(if_then->condition);
1139 ResetTemporaries(); 1139 ResetTemporaries();
1140 1140
1141 AddLine("MOVC.U RC.x, {};", condition); 1141 AddLine("MOVC.U RC.x, {};", condition);
1142 AddLine("IF NE.x;"); 1142 AddLine("IF NE.x;");
1143 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { 1143 for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
1144 VisitAST(current); 1144 VisitAST(current);
1145 } 1145 }
1146 AddLine("ENDIF;"); 1146 AddLine("ENDIF;");
1147 } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { 1147 } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1148 AddLine("ELSE;"); 1148 AddLine("ELSE;");
1149 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { 1149 for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
1150 VisitAST(current); 1150 VisitAST(current);
1151 } 1151 }
1152 } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { 1152 } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1153 VisitBlock(ast->nodes); 1153 VisitBlock(decoded->nodes);
1154 } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { 1154 } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1155 AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); 1155 AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
1156 ResetTemporaries(); 1156 ResetTemporaries();
1157 } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { 1157 } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1158 const std::string condition = VisitExpression(ast->condition); 1158 const std::string condition = VisitExpression(do_while->condition);
1159 ResetTemporaries(); 1159 ResetTemporaries();
1160 AddLine("REP;"); 1160 AddLine("REP;");
1161 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { 1161 for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
1162 VisitAST(current); 1162 VisitAST(current);
1163 } 1163 }
1164 AddLine("MOVC.U RC.x, {};", condition); 1164 AddLine("MOVC.U RC.x, {};", condition);
1165 AddLine("BRK (NE.x);"); 1165 AddLine("BRK (NE.x);");
1166 AddLine("ENDREP;"); 1166 AddLine("ENDREP;");
1167 } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { 1167 } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1168 const bool is_true = ExprIsTrue(ast->condition); 1168 const bool is_true = ExprIsTrue(ast_return->condition);
1169 if (!is_true) { 1169 if (!is_true) {
1170 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); 1170 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
1171 AddLine("IF NE.x;"); 1171 AddLine("IF NE.x;");
1172 ResetTemporaries(); 1172 ResetTemporaries();
1173 } 1173 }
1174 if (ast->kills) { 1174 if (ast_return->kills) {
1175 AddLine("KIL TR;"); 1175 AddLine("KIL TR;");
1176 } else { 1176 } else {
1177 Exit(); 1177 Exit();
@@ -1179,11 +1179,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
1179 if (!is_true) { 1179 if (!is_true) {
1180 AddLine("ENDIF;"); 1180 AddLine("ENDIF;");
1181 } 1181 }
1182 } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { 1182 } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1183 if (ExprIsTrue(ast->condition)) { 1183 if (ExprIsTrue(ast_break->condition)) {
1184 AddLine("BRK;"); 1184 AddLine("BRK;");
1185 } else { 1185 } else {
1186 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); 1186 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
1187 AddLine("BRK (NE.x);"); 1187 AddLine("BRK (NE.x);");
1188 ResetTemporaries(); 1188 ResetTemporaries();
1189 } 1189 }
@@ -1351,7 +1351,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
1351 GetGenericAttributeIndex(index), swizzle); 1351 GetGenericAttributeIndex(index), swizzle);
1352 } 1352 }
1353 } 1353 }
1354 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); 1354 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
1355 break; 1355 break;
1356 } 1356 }
1357 return "{0, 0, 0, 0}.x"; 1357 return "{0, 0, 0, 0}.x";
@@ -1485,9 +1485,7 @@ void ARBDecompiler::Exit() {
1485 } 1485 }
1486 1486
1487 const auto safe_get_register = [this](u32 reg) -> std::string { 1487 const auto safe_get_register = [this](u32 reg) -> std::string {
1488 // TODO(Rodrigo): Replace with contains once C++20 releases 1488 if (ir.GetRegisters().contains(reg)) {
1489 const auto& used_registers = ir.GetRegisters();
1490 if (used_registers.find(reg) != used_registers.end()) {
1491 return fmt::format("R{}.x", reg); 1489 return fmt::format("R{}.x", reg);
1492 } 1490 }
1493 return "{0, 0, 0, 0}.x"; 1491 return "{0, 0, 0, 0}.x";
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b1c4cd62f..5772cad87 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,11 +22,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22 22
23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
24 24
25Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) 25Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
26 : VideoCommon::BufferBlock{cpu_addr, size} { 26 : BufferBlock{cpu_addr_, size_} {
27 gl_buffer.Create(); 27 gl_buffer.Create();
28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
29 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { 29 if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); 30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); 31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 } 32 }
@@ -34,14 +34,14 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
34 34
35Buffer::~Buffer() = default; 35Buffer::~Buffer() = default;
36 36
37void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { 37void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
38 glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), 38 glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
39 data); 39 static_cast<GLsizeiptr>(data_size), data);
40} 40}
41 41
42void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { 42void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
43 MICROPROFILE_SCOPE(OpenGL_Buffer_Download); 43 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
44 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); 44 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
45 const GLintptr gl_offset = static_cast<GLintptr>(offset); 45 const GLintptr gl_offset = static_cast<GLintptr>(offset);
46 if (read_buffer.handle == 0) { 46 if (read_buffer.handle == 0) {
47 read_buffer.Create(); 47 read_buffer.Create();
@@ -54,17 +54,16 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
54} 54}
55 55
56void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 56void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
57 std::size_t size) { 57 std::size_t copy_size) {
58 glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), 58 glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
59 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); 59 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
60} 60}
61 61
62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, 62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
63 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 63 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
64 const Device& device_, std::size_t stream_size) 64 const Device& device_, OGLStreamBuffer& stream_buffer_,
65 : GenericBufferCache{rasterizer, gpu_memory, cpu_memory, 65 StateTracker& state_tracker)
66 std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, 66 : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
67 device{device_} {
68 if (!device.HasFastBufferSubData()) { 67 if (!device.HasFastBufferSubData()) {
69 return; 68 return;
70 } 69 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f75b32e31..17ee90316 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -22,18 +22,19 @@ namespace OpenGL {
22class Device; 22class Device;
23class OGLStreamBuffer; 23class OGLStreamBuffer;
24class RasterizerOpenGL; 24class RasterizerOpenGL;
25class StateTracker;
25 26
26class Buffer : public VideoCommon::BufferBlock { 27class Buffer : public VideoCommon::BufferBlock {
27public: 28public:
28 explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); 29 explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
29 ~Buffer(); 30 ~Buffer();
30 31
31 void Upload(std::size_t offset, std::size_t size, const u8* data); 32 void Upload(std::size_t offset, std::size_t data_size, const u8* data);
32 33
33 void Download(std::size_t offset, std::size_t size, u8* data); 34 void Download(std::size_t offset, std::size_t data_size, u8* data);
34 35
35 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 36 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
36 std::size_t size); 37 std::size_t copy_size);
37 38
38 GLuint Handle() const noexcept { 39 GLuint Handle() const noexcept {
39 return gl_buffer.handle; 40 return gl_buffer.handle;
@@ -54,7 +55,8 @@ class OGLBufferCache final : public GenericBufferCache {
54public: 55public:
55 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, 56 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
56 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 57 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
57 const Device& device, std::size_t stream_size); 58 const Device& device, OGLStreamBuffer& stream_buffer,
59 StateTracker& state_tracker);
58 ~OGLBufferCache(); 60 ~OGLBufferCache();
59 61
60 BufferInfo GetEmptyBuffer(std::size_t) override; 62 BufferInfo GetEmptyBuffer(std::size_t) override;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a94e4f72e..b24179d59 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,9 +5,11 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <cstdlib>
8#include <cstring> 9#include <cstring>
9#include <limits> 10#include <limits>
10#include <optional> 11#include <optional>
12#include <span>
11#include <vector> 13#include <vector>
12 14
13#include <glad/glad.h> 15#include <glad/glad.h>
@@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
27 29
28constexpr u32 NumStages = 5; 30constexpr u32 NumStages = 5;
29 31
30constexpr std::array LimitUBOs = { 32constexpr std::array LIMIT_UBOS = {
31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 33 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, 34 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; 35 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
34 36};
35constexpr std::array LimitSSBOs = { 37constexpr std::array LIMIT_SSBOS = {
36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, 38 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, 39 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; 40 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
39 41};
40constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, 42constexpr std::array LIMIT_SAMPLERS = {
41 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, 43 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
42 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, 44 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
43 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, 45 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
44 GL_MAX_TEXTURE_IMAGE_UNITS, 46 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
45 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; 47 GL_MAX_TEXTURE_IMAGE_UNITS,
46 48 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
47constexpr std::array LimitImages = { 49};
50constexpr std::array LIMIT_IMAGES = {
48 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, 51 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
49 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, 52 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
50 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; 53 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
54};
51 55
52template <typename T> 56template <typename T>
53T GetInteger(GLenum pname) { 57T GetInteger(GLenum pname) {
@@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
76 return extensions; 80 return extensions;
77} 81}
78 82
79bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { 83bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
80 return std::find(images.begin(), images.end(), extension) != images.end(); 84 return std::ranges::find(extensions, extension) != extensions.end();
81} 85}
82 86
83u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { 87u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
@@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
91 95
92std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { 96std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
93 std::array<u32, Tegra::Engines::MaxShaderTypes> max; 97 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
94 std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), 98 std::ranges::transform(LIMIT_UBOS, max.begin(),
95 [](GLenum pname) { return GetInteger<u32>(pname); }); 99 [](GLenum pname) { return GetInteger<u32>(pname); });
96 return max; 100 return max;
97} 101}
98 102
@@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
115 for (std::size_t i = 0; i < NumStages; ++i) { 119 for (std::size_t i = 0; i < NumStages; ++i) {
116 const std::size_t stage = stage_swizzle[i]; 120 const std::size_t stage = stage_swizzle[i];
117 bindings[stage] = { 121 bindings[stage] = {
118 Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), 122 Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
119 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), 123 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
120 Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; 124 Extract(base_samplers, num_samplers, total_samplers / NumStages,
125 LIMIT_SAMPLERS[stage])};
121 } 126 }
122 127
123 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); 128 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
@@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
130 135
131 // Reserve at least 4 image bindings on the fragment stage. 136 // Reserve at least 4 image bindings on the fragment stage.
132 bindings[4].image = 137 bindings[4].image =
133 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); 138 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
134 139
135 // This is guaranteed to be at least 1. 140 // This is guaranteed to be at least 1.
136 const u32 total_extracted_images = num_images / (NumStages - 1); 141 const u32 total_extracted_images = num_images / (NumStages - 1);
@@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
142 continue; 147 continue;
143 } 148 }
144 bindings[stage].image = 149 bindings[stage].image =
145 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); 150 Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
146 } 151 }
147 152
148 // Compute doesn't care about any of this. 153 // Compute doesn't care about any of this.
@@ -188,6 +193,11 @@ bool IsASTCSupported() {
188 return true; 193 return true;
189} 194}
190 195
196[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
197 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
198 return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
199}
200
191} // Anonymous namespace 201} // Anonymous namespace
192 202
193Device::Device() 203Device::Device()
@@ -206,9 +216,8 @@ Device::Device()
206 "Beta driver 443.24 is known to have issues. There might be performance issues."); 216 "Beta driver 443.24 is known to have issues. There might be performance issues.");
207 disable_fast_buffer_sub_data = true; 217 disable_fast_buffer_sub_data = true;
208 } 218 }
209 219 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
210 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 220 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
211 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
212 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 221 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
213 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 222 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
214 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); 223 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
@@ -224,6 +233,7 @@ Device::Device()
224 has_precise_bug = TestPreciseBug(); 233 has_precise_bug = TestPreciseBug();
225 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; 234 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
226 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; 235 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
236 has_debugging_tool_attached = IsDebugToolAttached(extensions);
227 237
228 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive 238 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
229 // uniform buffers as "push constants" 239 // uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8a4b6b9fc..13e66846c 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -36,11 +36,11 @@ public:
36 return GetBaseBindings(static_cast<std::size_t>(shader_type)); 36 return GetBaseBindings(static_cast<std::size_t>(shader_type));
37 } 37 }
38 38
39 std::size_t GetUniformBufferAlignment() const { 39 size_t GetUniformBufferAlignment() const {
40 return uniform_buffer_alignment; 40 return uniform_buffer_alignment;
41 } 41 }
42 42
43 std::size_t GetShaderStorageBufferAlignment() const { 43 size_t GetShaderStorageBufferAlignment() const {
44 return shader_storage_alignment; 44 return shader_storage_alignment;
45 } 45 }
46 46
@@ -104,6 +104,10 @@ public:
104 return has_nv_viewport_array2; 104 return has_nv_viewport_array2;
105 } 105 }
106 106
107 bool HasDebuggingToolAttached() const {
108 return has_debugging_tool_attached;
109 }
110
107 bool UseAssemblyShaders() const { 111 bool UseAssemblyShaders() const {
108 return use_assembly_shaders; 112 return use_assembly_shaders;
109 } 113 }
@@ -118,8 +122,8 @@ private:
118 122
119 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; 123 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
120 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; 124 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
121 std::size_t uniform_buffer_alignment{}; 125 size_t uniform_buffer_alignment{};
122 std::size_t shader_storage_alignment{}; 126 size_t shader_storage_alignment{};
123 u32 max_vertex_attributes{}; 127 u32 max_vertex_attributes{};
124 u32 max_varyings{}; 128 u32 max_varyings{};
125 u32 max_compute_shared_memory_size{}; 129 u32 max_compute_shared_memory_size{};
@@ -135,6 +139,7 @@ private:
135 bool has_precise_bug{}; 139 bool has_precise_bug{};
136 bool has_fast_buffer_sub_data{}; 140 bool has_fast_buffer_sub_data{};
137 bool has_nv_viewport_array2{}; 141 bool has_nv_viewport_array2{};
142 bool has_debugging_tool_attached{};
138 bool use_assembly_shaders{}; 143 bool use_assembly_shaders{};
139 bool use_asynchronous_shaders{}; 144 bool use_asynchronous_shaders{};
140}; 145};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index b532fdcc2..3e9c922f5 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -11,10 +11,10 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {} 14GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
15 15
16GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) 16GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
17 : FenceBase(address, payload, is_stubbed) {} 17 : FenceBase{address_, payload_, is_stubbed_} {}
18 18
19GLInnerFence::~GLInnerFence() = default; 19GLInnerFence::~GLInnerFence() = default;
20 20
@@ -45,10 +45,10 @@ void GLInnerFence::Wait() {
45 glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); 45 glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
46} 46}
47 47
48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, 48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
49 TextureCacheOpenGL& texture_cache, 49 Tegra::GPU& gpu_, TextureCache& texture_cache_,
50 OGLBufferCache& buffer_cache, QueryCache& query_cache) 50 OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
51 : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {} 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
52 52
53Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { 53Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
54 return std::make_shared<GLInnerFence>(value, is_stubbed); 54 return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index da1dcdace..30dbee613 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,8 +17,8 @@ namespace OpenGL {
17 17
18class GLInnerFence : public VideoCommon::FenceBase { 18class GLInnerFence : public VideoCommon::FenceBase {
19public: 19public:
20 GLInnerFence(u32 payload, bool is_stubbed); 20 explicit GLInnerFence(u32 payload_, bool is_stubbed_);
21 GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed); 21 explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
22 ~GLInnerFence(); 22 ~GLInnerFence();
23 23
24 void Queue(); 24 void Queue();
@@ -33,13 +33,13 @@ private:
33 33
34using Fence = std::shared_ptr<GLInnerFence>; 34using Fence = std::shared_ptr<GLInnerFence>;
35using GenericFenceManager = 35using GenericFenceManager =
36 VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; 36 VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
37 37
38class FenceManagerOpenGL final : public GenericFenceManager { 38class FenceManagerOpenGL final : public GenericFenceManager {
39public: 39public:
40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, 40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
41 TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, 41 TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
42 QueryCache& query_cache); 42 QueryCache& query_cache_);
43 43
44protected: 44protected:
45 Fence CreateFence(u32 value, bool is_stubbed) override; 45 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
deleted file mode 100644
index b8a512cb6..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6#include <unordered_map>
7#include <utility>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
14
15namespace OpenGL {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using VideoCore::Surface::SurfaceType;
19
20FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
21
22FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
23
24GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
25 const auto [entry, is_cache_miss] = cache.try_emplace(key);
26 auto& framebuffer{entry->second};
27 if (is_cache_miss) {
28 framebuffer = CreateFramebuffer(key);
29 }
30 return framebuffer.handle;
31}
32
33OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
34 OGLFramebuffer framebuffer;
35 framebuffer.Create();
36
37 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
38 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
39
40 if (key.zeta) {
41 const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
42 const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
43 key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
44 }
45
46 std::size_t num_buffers = 0;
47 std::array<GLenum, Maxwell::NumRenderTargets> targets;
48
49 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
50 if (!key.colors[index]) {
51 targets[index] = GL_NONE;
52 continue;
53 }
54 const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
55 key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
56
57 const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
58 targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
59 num_buffers = index + 1;
60 }
61
62 if (num_buffers > 0) {
63 glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
64 } else {
65 glDrawBuffer(GL_NONE);
66 }
67
68 return framebuffer;
69}
70
71std::size_t FramebufferCacheKey::Hash() const noexcept {
72 std::size_t hash = std::hash<View>{}(zeta);
73 for (const auto& color : colors) {
74 hash ^= std::hash<View>{}(color);
75 }
76 hash ^= static_cast<std::size_t>(color_attachments) << 16;
77 return hash;
78}
79
80bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
81 return std::tie(colors, zeta, color_attachments) ==
82 std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
83}
84
85} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
deleted file mode 100644
index 8f698fee0..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <unordered_map>
10
11#include <glad/glad.h>
12
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_texture_cache.h"
17
18namespace OpenGL {
19
20constexpr std::size_t BitsPerAttachment = 4;
21
22struct FramebufferCacheKey {
23 View zeta;
24 std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
25 u32 color_attachments = 0;
26
27 std::size_t Hash() const noexcept;
28
29 bool operator==(const FramebufferCacheKey& rhs) const noexcept;
30
31 bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
32 return !operator==(rhs);
33 }
34
35 void SetAttachment(std::size_t index, u32 attachment) {
36 color_attachments |= attachment << (BitsPerAttachment * index);
37 }
38};
39
40} // namespace OpenGL
41
42namespace std {
43
44template <>
45struct hash<OpenGL::FramebufferCacheKey> {
46 std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
47 return k.Hash();
48 }
49};
50
51} // namespace std
52
53namespace OpenGL {
54
55class FramebufferCacheOpenGL {
56public:
57 FramebufferCacheOpenGL();
58 ~FramebufferCacheOpenGL();
59
60 GLuint GetFramebuffer(const FramebufferCacheKey& key);
61
62private:
63 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
64
65 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
66};
67
68} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 1a3d9720e..acebbf5f4 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -30,11 +30,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
30 30
31} // Anonymous namespace 31} // Anonymous namespace
32 32
33QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, 33QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
34 Tegra::MemoryManager& gpu_memory) 34 Tegra::MemoryManager& gpu_memory_)
35 : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>( 35 : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
36 rasterizer, maxwell3d, gpu_memory),
37 gl_rasterizer{rasterizer} {}
38 36
39QueryCache::~QueryCache() = default; 37QueryCache::~QueryCache() = default;
40 38
@@ -59,10 +57,11 @@ bool QueryCache::AnyCommandQueued() const noexcept {
59 return gl_rasterizer.AnyCommandQueued(); 57 return gl_rasterizer.AnyCommandQueued();
60} 58}
61 59
62HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, 60HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
63 VideoCore::QueryType type) 61 VideoCore::QueryType type_)
64 : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache}, 62 : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
65 type{type}, query{cache.AllocateQuery(type)} { 63 cache.AllocateQuery(
64 type)} {
66 glBeginQuery(GetTarget(type), query.handle); 65 glBeginQuery(GetTarget(type), query.handle);
67} 66}
68 67
@@ -86,13 +85,14 @@ u64 HostCounter::BlockingQuery() const {
86 return static_cast<u64>(value); 85 return static_cast<u64>(value);
87} 86}
88 87
89CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) 88CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
90 : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} 89 u8* host_ptr_)
90 : CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
91 91
92CachedQuery::~CachedQuery() = default; 92CachedQuery::~CachedQuery() = default;
93 93
94CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept 94CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
95 : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} 95 : CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
96 96
97CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { 97CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
98 cache = rhs.cache; 98 cache = rhs.cache;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 82cac51ee..7bbe5cfe9 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -29,8 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
29class QueryCache final 29class QueryCache final
30 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 30 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
31public: 31public:
32 explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, 32 explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
33 Tegra::MemoryManager& gpu_memory); 33 Tegra::MemoryManager& gpu_memory_);
34 ~QueryCache(); 34 ~QueryCache();
35 35
36 OGLQuery AllocateQuery(VideoCore::QueryType type); 36 OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -46,8 +46,8 @@ private:
46 46
47class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { 47class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
48public: 48public:
49 explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, 49 explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
50 VideoCore::QueryType type); 50 VideoCore::QueryType type_);
51 ~HostCounter(); 51 ~HostCounter();
52 52
53 void EndQuery(); 53 void EndQuery();
@@ -62,8 +62,8 @@ private:
62 62
63class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { 63class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
64public: 64public:
65 explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, 65 explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
66 u8* host_ptr); 66 u8* host_ptr_);
67 ~CachedQuery() override; 67 ~CachedQuery() override;
68 68
69 CachedQuery(CachedQuery&& rhs) noexcept; 69 CachedQuery(CachedQuery&& rhs) noexcept;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index cfddbde5d..8aa63d329 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,12 +25,15 @@
25#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
26#include "video_core/engines/shader_type.h" 26#include "video_core/engines/shader_type.h"
27#include "video_core/memory_manager.h" 27#include "video_core/memory_manager.h"
28#include "video_core/renderer_opengl/gl_device.h"
28#include "video_core/renderer_opengl/gl_query_cache.h" 29#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_rasterizer.h" 30#include "video_core/renderer_opengl/gl_rasterizer.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 31#include "video_core/renderer_opengl/gl_shader_cache.h"
32#include "video_core/renderer_opengl/gl_texture_cache.h"
31#include "video_core/renderer_opengl/maxwell_to_gl.h" 33#include "video_core/renderer_opengl/maxwell_to_gl.h"
32#include "video_core/renderer_opengl/renderer_opengl.h" 34#include "video_core/renderer_opengl/renderer_opengl.h"
33#include "video_core/shader_cache.h" 35#include "video_core/shader_cache.h"
36#include "video_core/texture_cache/texture_cache.h"
34 37
35namespace OpenGL { 38namespace OpenGL {
36 39
@@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
55 58
56namespace { 59namespace {
57 60
58constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; 61constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
59constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = 62constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
60 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; 63 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
61constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = 64constexpr size_t TOTAL_CONST_BUFFER_BYTES =
62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; 65 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
63 66
64constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 67constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; 68constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
69
70constexpr size_t MAX_TEXTURES = 192;
71constexpr size_t MAX_IMAGES = 48;
72
73struct TextureHandle {
74 constexpr TextureHandle(u32 data, bool via_header_index) {
75 const Tegra::Texture::TextureHandle handle{data};
76 image = handle.tic_id;
77 sampler = via_header_index ? image : handle.tsc_id.Value();
78 }
79
80 u32 image;
81 u32 sampler;
82};
66 83
67template <typename Engine, typename Entry> 84template <typename Engine, typename Entry>
68Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 85TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
69 ShaderType shader_type, std::size_t index = 0) { 86 ShaderType shader_type, size_t index = 0) {
70 if constexpr (std::is_same_v<Entry, SamplerEntry>) { 87 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
71 if (entry.is_separated) { 88 if (entry.is_separated) {
72 const u32 buffer_1 = entry.buffer; 89 const u32 buffer_1 = entry.buffer;
@@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
75 const u32 offset_2 = entry.secondary_offset; 92 const u32 offset_2 = entry.secondary_offset;
76 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); 93 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
77 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); 94 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
78 return engine.GetTextureInfo(handle_1 | handle_2); 95 return TextureHandle(handle_1 | handle_2, via_header_index);
79 } 96 }
80 } 97 }
81 if (entry.is_bindless) { 98 if (entry.is_bindless) {
82 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); 99 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
83 return engine.GetTextureInfo(handle); 100 return TextureHandle(raw, via_header_index);
84 }
85
86 const auto& gpu_profile = engine.AccessGuestDriverProfile();
87 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
88 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
89 return engine.GetStageTexture(shader_type, offset);
90 } else {
91 return engine.GetTexture(offset);
92 } 101 }
102 const u32 buffer = engine.GetBoundBuffer();
103 const u64 offset = (entry.offset + index) * sizeof(u32);
104 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
93} 105}
94 106
95std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, 107std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
@@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
97 if (!entry.IsIndirect()) { 109 if (!entry.IsIndirect()) {
98 return entry.GetSize(); 110 return entry.GetSize();
99 } 111 }
100
101 if (buffer.size > Maxwell::MaxConstBufferSize) { 112 if (buffer.size > Maxwell::MaxConstBufferSize) {
102 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, 113 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
103 Maxwell::MaxConstBufferSize); 114 Maxwell::MaxConstBufferSize);
@@ -131,7 +142,7 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
131 case 43: 142 case 43:
132 return {GL_BACK_SECONDARY_COLOR_NV, 0}; 143 return {GL_BACK_SECONDARY_COLOR_NV, 0};
133 } 144 }
134 UNIMPLEMENTED_MSG("index={}", static_cast<int>(index)); 145 UNIMPLEMENTED_MSG("index={}", index);
135 return {GL_POSITION, 0}; 146 return {GL_POSITION, 0};
136} 147}
137 148
@@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss
147 reinterpret_cast<const GLuint*>(ssbos)); 158 reinterpret_cast<const GLuint*>(ssbos));
148} 159}
149 160
161ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
162 if (entry.is_buffer) {
163 return ImageViewType::Buffer;
164 }
165 switch (entry.type) {
166 case Tegra::Shader::TextureType::Texture1D:
167 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
168 case Tegra::Shader::TextureType::Texture2D:
169 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
170 case Tegra::Shader::TextureType::Texture3D:
171 return ImageViewType::e3D;
172 case Tegra::Shader::TextureType::TextureCube:
173 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
174 }
175 UNREACHABLE();
176 return ImageViewType::e2D;
177}
178
179ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
180 switch (entry.type) {
181 case Tegra::Shader::ImageType::Texture1D:
182 return ImageViewType::e1D;
183 case Tegra::Shader::ImageType::Texture1DArray:
184 return ImageViewType::e1DArray;
185 case Tegra::Shader::ImageType::Texture2D:
186 return ImageViewType::e2D;
187 case Tegra::Shader::ImageType::Texture2DArray:
188 return ImageViewType::e2DArray;
189 case Tegra::Shader::ImageType::Texture3D:
190 return ImageViewType::e3D;
191 case Tegra::Shader::ImageType::TextureBuffer:
192 return ImageViewType::Buffer;
193 }
194 UNREACHABLE();
195 return ImageViewType::e2D;
196}
197
150} // Anonymous namespace 198} // Anonymous namespace
151 199
152RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, 200RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
153 Core::Memory::Memory& cpu_memory, const Device& device_, 201 Core::Memory::Memory& cpu_memory_, const Device& device_,
154 ScreenInfo& screen_info_, ProgramManager& program_manager_, 202 ScreenInfo& screen_info_, ProgramManager& program_manager_,
155 StateTracker& state_tracker_) 203 StateTracker& state_tracker_)
156 : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), 204 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
157 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), 205 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
158 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), 206 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
159 texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), 207 stream_buffer(device, state_tracker),
160 shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device), 208 texture_cache_runtime(device, program_manager, state_tracker),
209 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
210 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
161 query_cache(*this, maxwell3d, gpu_memory), 211 query_cache(*this, maxwell3d, gpu_memory),
162 buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE), 212 buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
163 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 213 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
164 async_shaders(emu_window) { 214 async_shaders(emu_window_) {
165 CheckExtensions();
166
167 unified_uniform_buffer.Create(); 215 unified_uniform_buffer.Create();
168 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); 216 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
169 217
@@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra:
174 nullptr, 0); 222 nullptr, 0);
175 } 223 }
176 } 224 }
177
178 if (device.UseAsynchronousShaders()) { 225 if (device.UseAsynchronousShaders()) {
179 async_shaders.AllocateWorkers(); 226 async_shaders.AllocateWorkers();
180 } 227 }
@@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
186 } 233 }
187} 234}
188 235
189void RasterizerOpenGL::CheckExtensions() {
190 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
191 LOG_WARNING(
192 Render_OpenGL,
193 "Anisotropic filter is not supported! This can cause graphical issues in some games.");
194 }
195}
196
197void RasterizerOpenGL::SetupVertexFormat() { 236void RasterizerOpenGL::SetupVertexFormat() {
198 auto& flags = maxwell3d.dirty.flags; 237 auto& flags = maxwell3d.dirty.flags;
199 if (!flags[Dirty::VertexFormats]) { 238 if (!flags[Dirty::VertexFormats]) {
@@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
316 return info.offset; 355 return info.offset;
317} 356}
318 357
319void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 358void RasterizerOpenGL::SetupShaders() {
320 MICROPROFILE_SCOPE(OpenGL_Shader); 359 MICROPROFILE_SCOPE(OpenGL_Shader);
321 u32 clip_distances = 0; 360 u32 clip_distances = 0;
322 361
362 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
363 image_view_indices.clear();
364 sampler_handles.clear();
365
366 texture_cache.SynchronizeGraphicsDescriptors();
367
323 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 368 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
324 const auto& shader_config = maxwell3d.regs.shader_config[index]; 369 const auto& shader_config = maxwell3d.regs.shader_config[index];
325 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 370 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
338 } 383 }
339 continue; 384 continue;
340 } 385 }
341
342 // Currently this stages are not supported in the OpenGL backend. 386 // Currently this stages are not supported in the OpenGL backend.
343 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL 387 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
344 if (program == Maxwell::ShaderProgram::TesselationControl || 388 if (program == Maxwell::ShaderProgram::TesselationControl ||
@@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
347 } 391 }
348 392
349 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); 393 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
350
351 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; 394 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
352 switch (program) { 395 switch (program) {
353 case Maxwell::ShaderProgram::VertexA: 396 case Maxwell::ShaderProgram::VertexA:
@@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
363 default: 406 default:
364 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 407 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
365 shader_config.enable.Value(), shader_config.offset); 408 shader_config.enable.Value(), shader_config.offset);
409 break;
366 } 410 }
367 411
368 // Stage indices are 0 - 5 412 // Stage indices are 0 - 5
369 const std::size_t stage = index == 0 ? 0 : index - 1; 413 const size_t stage = index == 0 ? 0 : index - 1;
414 shaders[stage] = shader;
415
370 SetupDrawConstBuffers(stage, shader); 416 SetupDrawConstBuffers(stage, shader);
371 SetupDrawGlobalMemory(stage, shader); 417 SetupDrawGlobalMemory(stage, shader);
372 SetupDrawTextures(stage, shader); 418 SetupDrawTextures(shader, stage);
373 SetupDrawImages(stage, shader); 419 SetupDrawImages(shader, stage);
374 420
375 // Workaround for Intel drivers. 421 // Workaround for Intel drivers.
376 // When a clip distance is enabled but not set in the shader it crops parts of the screen 422 // When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
384 ++index; 430 ++index;
385 } 431 }
386 } 432 }
387
388 SyncClipEnabled(clip_distances); 433 SyncClipEnabled(clip_distances);
389 maxwell3d.dirty.flags[Dirty::Shaders] = false; 434 maxwell3d.dirty.flags[Dirty::Shaders] = false;
435
436 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
437 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
438
439 size_t image_view_index = 0;
440 size_t texture_index = 0;
441 size_t image_index = 0;
442 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
443 const Shader* const shader = shaders[stage];
444 if (shader) {
445 const auto base = device.GetBaseBindings(stage);
446 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
447 texture_index, image_index);
448 }
449 }
390} 450}
391 451
392std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 452std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
417 shader_cache.LoadDiskCache(title_id, stop_loading, callback); 477 shader_cache.LoadDiskCache(title_id, stop_loading, callback);
418} 478}
419 479
420void RasterizerOpenGL::ConfigureFramebuffers() {
421 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
422 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
423 return;
424 }
425 maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
426
427 texture_cache.GuardRenderTargets(true);
428
429 View depth_surface = texture_cache.GetDepthBufferSurface(true);
430
431 const auto& regs = maxwell3d.regs;
432 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
433
434 // Bind the framebuffer surfaces
435 FramebufferCacheKey key;
436 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
437 for (std::size_t index = 0; index < colors_count; ++index) {
438 View color_surface{texture_cache.GetColorBufferSurface(index, true)};
439 if (!color_surface) {
440 continue;
441 }
442 // Assume that a surface will be written to if it is used as a framebuffer, even
443 // if the shader doesn't actually write to it.
444 texture_cache.MarkColorBufferInUse(index);
445
446 key.SetAttachment(index, regs.rt_control.GetMap(index));
447 key.colors[index] = std::move(color_surface);
448 }
449
450 if (depth_surface) {
451 // Assume that a surface will be written to if it is used as a framebuffer, even if
452 // the shader doesn't actually write to it.
453 texture_cache.MarkDepthBufferInUse();
454 key.zeta = std::move(depth_surface);
455 }
456
457 texture_cache.GuardRenderTargets(false);
458
459 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
460}
461
462void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
463 const auto& regs = maxwell3d.regs;
464
465 texture_cache.GuardRenderTargets(true);
466 View color_surface;
467
468 if (using_color) {
469 // Determine if we have to preserve the contents.
470 // First we have to make sure all clear masks are enabled.
471 bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
472 !regs.clear_buffers.B || !regs.clear_buffers.A;
473 const std::size_t index = regs.clear_buffers.RT;
474 if (regs.clear_flags.scissor) {
475 // Then we have to confirm scissor testing clears the whole image.
476 const auto& scissor = regs.scissor_test[0];
477 preserve_contents |= scissor.min_x > 0;
478 preserve_contents |= scissor.min_y > 0;
479 preserve_contents |= scissor.max_x < regs.rt[index].width;
480 preserve_contents |= scissor.max_y < regs.rt[index].height;
481 }
482
483 color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
484 texture_cache.MarkColorBufferInUse(index);
485 }
486
487 View depth_surface;
488 if (using_depth_stencil) {
489 bool preserve_contents = false;
490 if (regs.clear_flags.scissor) {
491 // For depth stencil clears we only have to confirm scissor test covers the whole image.
492 const auto& scissor = regs.scissor_test[0];
493 preserve_contents |= scissor.min_x > 0;
494 preserve_contents |= scissor.min_y > 0;
495 preserve_contents |= scissor.max_x < regs.zeta_width;
496 preserve_contents |= scissor.max_y < regs.zeta_height;
497 }
498
499 depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
500 texture_cache.MarkDepthBufferInUse();
501 }
502 texture_cache.GuardRenderTargets(false);
503
504 FramebufferCacheKey key;
505 key.colors[0] = std::move(color_surface);
506 key.zeta = std::move(depth_surface);
507
508 state_tracker.NotifyFramebuffer();
509 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
510}
511
512void RasterizerOpenGL::Clear() { 480void RasterizerOpenGL::Clear() {
513 if (!maxwell3d.ShouldExecute()) { 481 if (!maxwell3d.ShouldExecute()) {
514 return; 482 return;
@@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {
523 regs.clear_buffers.A) { 491 regs.clear_buffers.A) {
524 use_color = true; 492 use_color = true;
525 493
526 state_tracker.NotifyColorMask0(); 494 const GLuint index = regs.clear_buffers.RT;
527 glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, 495 state_tracker.NotifyColorMask(index);
496 glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
528 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); 497 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
529 498
530 // TODO(Rodrigo): Determine if clamping is used on clears 499 // TODO(Rodrigo): Determine if clamping is used on clears
@@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {
557 state_tracker.NotifyScissor0(); 526 state_tracker.NotifyScissor0();
558 glDisablei(GL_SCISSOR_TEST, 0); 527 glDisablei(GL_SCISSOR_TEST, 0);
559 } 528 }
560
561 UNIMPLEMENTED_IF(regs.clear_flags.viewport); 529 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
562 530
563 ConfigureClearFramebuffer(use_color, use_depth || use_stencil); 531 {
532 auto lock = texture_cache.AcquireLock();
533 texture_cache.UpdateRenderTargets(true);
534 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
535 }
564 536
565 if (use_color) { 537 if (use_color) {
566 glClearBufferfv(GL_COLOR, 0, regs.clear_color); 538 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
567 } 539 }
568
569 if (use_depth && use_stencil) { 540 if (use_depth && use_stencil) {
570 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 541 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
571 } else if (use_depth) { 542 } else if (use_depth) {
@@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
622 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 593 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
623 594
624 // Prepare the vertex array. 595 // Prepare the vertex array.
625 const bool invalidated = buffer_cache.Map(buffer_size); 596 buffer_cache.Map(buffer_size);
626
627 if (invalidated) {
628 // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
629 auto& dirty = maxwell3d.dirty.flags;
630 dirty[Dirty::VertexBuffers] = true;
631 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
632 dirty[index] = true;
633 }
634 }
635 597
636 // Prepare vertex array format. 598 // Prepare vertex array format.
637 SetupVertexFormat(); 599 SetupVertexFormat();
@@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
655 } 617 }
656 618
657 // Setup shaders and their used resources. 619 // Setup shaders and their used resources.
658 texture_cache.GuardSamplers(true); 620 auto lock = texture_cache.AcquireLock();
659 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); 621 SetupShaders();
660 SetupShaders(primitive_mode);
661 texture_cache.GuardSamplers(false);
662
663 ConfigureFramebuffers();
664 622
665 // Signal the buffer cache that we are not going to upload more things. 623 // Signal the buffer cache that we are not going to upload more things.
666 buffer_cache.Unmap(); 624 buffer_cache.Unmap();
667 625 texture_cache.UpdateRenderTargets(false);
626 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
668 program_manager.BindGraphicsPipeline(); 627 program_manager.BindGraphicsPipeline();
669 628
670 if (texture_cache.TextureBarrier()) { 629 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
671 glTextureBarrier();
672 }
673
674 BeginTransformFeedback(primitive_mode); 630 BeginTransformFeedback(primitive_mode);
675 631
676 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); 632 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
@@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
722 buffer_cache.Acquire(); 678 buffer_cache.Acquire();
723 current_cbuf = 0; 679 current_cbuf = 0;
724 680
725 auto kernel = shader_cache.GetComputeKernel(code_addr); 681 Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
726 program_manager.BindCompute(kernel->GetHandle());
727 682
728 SetupComputeTextures(kernel); 683 auto lock = texture_cache.AcquireLock();
729 SetupComputeImages(kernel); 684 BindComputeTextures(kernel);
730 685
731 const std::size_t buffer_size = 686 const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
732 Tegra::Engines::KeplerCompute::NumConstBuffers * 687 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
733 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
734 buffer_cache.Map(buffer_size); 688 buffer_cache.Map(buffer_size);
735 689
736 SetupComputeConstBuffers(kernel); 690 SetupComputeConstBuffers(kernel);
@@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
739 buffer_cache.Unmap(); 693 buffer_cache.Unmap();
740 694
741 const auto& launch_desc = kepler_compute.launch_description; 695 const auto& launch_desc = kepler_compute.launch_description;
742 program_manager.BindCompute(kernel->GetHandle());
743 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 696 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
744 ++num_queued_commands; 697 ++num_queued_commands;
745} 698}
@@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
760 if (addr == 0 || size == 0) { 713 if (addr == 0 || size == 0) {
761 return; 714 return;
762 } 715 }
763 texture_cache.FlushRegion(addr, size); 716 {
717 auto lock = texture_cache.AcquireLock();
718 texture_cache.DownloadMemory(addr, size);
719 }
764 buffer_cache.FlushRegion(addr, size); 720 buffer_cache.FlushRegion(addr, size);
765 query_cache.FlushRegion(addr, size); 721 query_cache.FlushRegion(addr, size);
766} 722}
@@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
769 if (!Settings::IsGPULevelHigh()) { 725 if (!Settings::IsGPULevelHigh()) {
770 return buffer_cache.MustFlushRegion(addr, size); 726 return buffer_cache.MustFlushRegion(addr, size);
771 } 727 }
772 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); 728 return texture_cache.IsRegionGpuModified(addr, size) ||
729 buffer_cache.MustFlushRegion(addr, size);
773} 730}
774 731
775void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 732void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
777 if (addr == 0 || size == 0) { 734 if (addr == 0 || size == 0) {
778 return; 735 return;
779 } 736 }
780 texture_cache.InvalidateRegion(addr, size); 737 {
738 auto lock = texture_cache.AcquireLock();
739 texture_cache.WriteMemory(addr, size);
740 }
781 shader_cache.InvalidateRegion(addr, size); 741 shader_cache.InvalidateRegion(addr, size);
782 buffer_cache.InvalidateRegion(addr, size); 742 buffer_cache.InvalidateRegion(addr, size);
783 query_cache.InvalidateRegion(addr, size); 743 query_cache.InvalidateRegion(addr, size);
@@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
788 if (addr == 0 || size == 0) { 748 if (addr == 0 || size == 0) {
789 return; 749 return;
790 } 750 }
791 texture_cache.OnCPUWrite(addr, size); 751 {
752 auto lock = texture_cache.AcquireLock();
753 texture_cache.WriteMemory(addr, size);
754 }
792 shader_cache.OnCPUWrite(addr, size); 755 shader_cache.OnCPUWrite(addr, size);
793 buffer_cache.OnCPUWrite(addr, size); 756 buffer_cache.OnCPUWrite(addr, size);
794} 757}
795 758
796void RasterizerOpenGL::SyncGuestHost() { 759void RasterizerOpenGL::SyncGuestHost() {
797 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 760 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
798 texture_cache.SyncGuestHost();
799 buffer_cache.SyncGuestHost(); 761 buffer_cache.SyncGuestHost();
800 shader_cache.SyncGuestHost(); 762 shader_cache.SyncGuestHost();
801} 763}
802 764
765void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
766 {
767 auto lock = texture_cache.AcquireLock();
768 texture_cache.UnmapMemory(addr, size);
769 }
770 buffer_cache.OnCPUWrite(addr, size);
771 shader_cache.OnCPUWrite(addr, size);
772}
773
803void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { 774void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
804 if (!gpu.IsAsync()) { 775 if (!gpu.IsAsync()) {
805 gpu_memory.Write<u32>(addr, value); 776 gpu_memory.Write<u32>(addr, value);
@@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
841 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); 812 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
842} 813}
843 814
815void RasterizerOpenGL::FragmentBarrier() {
816 glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
817}
818
819void RasterizerOpenGL::TiledCacheBarrier() {
820 glTextureBarrier();
821}
822
844void RasterizerOpenGL::FlushCommands() { 823void RasterizerOpenGL::FlushCommands() {
845 // Only flush when we have commands queued to OpenGL. 824 // Only flush when we have commands queued to OpenGL.
846 if (num_queued_commands == 0) { 825 if (num_queued_commands == 0) {
@@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() {
854 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. 833 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
855 num_queued_commands = 0; 834 num_queued_commands = 0;
856 835
836 fence_manager.TickFrame();
857 buffer_cache.TickFrame(); 837 buffer_cache.TickFrame();
838 {
839 auto lock = texture_cache.AcquireLock();
840 texture_cache.TickFrame();
841 }
858} 842}
859 843
860bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 844bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
861 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 845 const Tegra::Engines::Fermi2D::Surface& dst,
862 const Tegra::Engines::Fermi2D::Config& copy_config) { 846 const Tegra::Engines::Fermi2D::Config& copy_config) {
863 MICROPROFILE_SCOPE(OpenGL_Blits); 847 MICROPROFILE_SCOPE(OpenGL_Blits);
864 texture_cache.DoFermiCopy(src, dst, copy_config); 848 auto lock = texture_cache.AcquireLock();
849 texture_cache.BlitImage(dst, src, copy_config);
865 return true; 850 return true;
866} 851}
867 852
868bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, 853bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
869 VAddr framebuffer_addr, u32 pixel_stride) { 854 VAddr framebuffer_addr, u32 pixel_stride) {
870 if (!framebuffer_addr) { 855 if (framebuffer_addr == 0) {
871 return {}; 856 return false;
872 } 857 }
873
874 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 858 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
875 859
876 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; 860 auto lock = texture_cache.AcquireLock();
877 if (!surface) { 861 ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
878 return {}; 862 if (!image_view) {
863 return false;
879 } 864 }
880
881 // Verify that the cached surface is the same size and format as the requested framebuffer 865 // Verify that the cached surface is the same size and format as the requested framebuffer
882 const auto& params{surface->GetSurfaceParams()}; 866 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
883 const auto& pixel_format{ 867 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
884 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
885 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
886 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
887 868
888 if (params.pixel_format != pixel_format) { 869 screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
889 LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); 870 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
890 } 871 return true;
872}
891 873
892 screen_info.display_texture = surface->GetTexture(); 874void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
893 screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; 875 image_view_indices.clear();
876 sampler_handles.clear();
894 877
895 return true; 878 texture_cache.SynchronizeComputeDescriptors();
879
880 SetupComputeTextures(kernel);
881 SetupComputeImages(kernel);
882
883 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
884 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
885
886 program_manager.BindCompute(kernel->GetHandle());
887 size_t image_view_index = 0;
888 size_t texture_index = 0;
889 size_t image_index = 0;
890 BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
891}
892
893void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
894 GLuint base_image, size_t& image_view_index,
895 size_t& texture_index, size_t& image_index) {
896 const GLuint* const samplers = sampler_handles.data() + texture_index;
897 const GLuint* const textures = texture_handles.data() + texture_index;
898 const GLuint* const images = image_handles.data() + image_index;
899
900 const size_t num_samplers = entries.samplers.size();
901 for (const auto& sampler : entries.samplers) {
902 for (size_t i = 0; i < sampler.size; ++i) {
903 const ImageViewId image_view_id = image_view_ids[image_view_index++];
904 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
905 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
906 texture_handles[texture_index++] = handle;
907 }
908 }
909 const size_t num_images = entries.images.size();
910 for (size_t unit = 0; unit < num_images; ++unit) {
911 // TODO: Mark as modified
912 const ImageViewId image_view_id = image_view_ids[image_view_index++];
913 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
914 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
915 image_handles[image_index] = handle;
916 ++image_index;
917 }
918 if (num_samplers > 0) {
919 glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
920 glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
921 }
922 if (num_images > 0) {
923 glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
924 }
896} 925}
897 926
898void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { 927void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
@@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
999 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, 1028 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1000 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, 1029 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1001 }; 1030 };
1002
1003 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; 1031 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1004 const auto& entries{shader->GetEntries().global_memory_entries}; 1032 const auto& entries{shader->GetEntries().global_memory_entries};
1005 1033
@@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
1056 } 1084 }
1057} 1085}
1058 1086
1059void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { 1087void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
1060 MICROPROFILE_SCOPE(OpenGL_Texture); 1088 const bool via_header_index =
1061 u32 binding = device.GetBaseBindings(stage_index).sampler; 1089 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1062 for (const auto& entry : shader->GetEntries().samplers) { 1090 for (const auto& entry : shader->GetEntries().samplers) {
1063 const auto shader_type = static_cast<ShaderType>(stage_index); 1091 const auto shader_type = static_cast<ShaderType>(stage_index);
1064 for (std::size_t i = 0; i < entry.size; ++i) { 1092 for (size_t index = 0; index < entry.size; ++index) {
1065 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); 1093 const auto handle =
1066 SetupTexture(binding++, texture, entry); 1094 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
1095 const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
1096 sampler_handles.push_back(sampler->Handle());
1097 image_view_indices.push_back(handle.image);
1067 } 1098 }
1068 } 1099 }
1069} 1100}
1070 1101
1071void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { 1102void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
1072 MICROPROFILE_SCOPE(OpenGL_Texture); 1103 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1073 u32 binding = 0;
1074 for (const auto& entry : kernel->GetEntries().samplers) { 1104 for (const auto& entry : kernel->GetEntries().samplers) {
1075 for (std::size_t i = 0; i < entry.size; ++i) { 1105 for (size_t i = 0; i < entry.size; ++i) {
1076 const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); 1106 const auto handle =
1077 SetupTexture(binding++, texture, entry); 1107 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
1108 const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
1109 sampler_handles.push_back(sampler->Handle());
1110 image_view_indices.push_back(handle.image);
1078 } 1111 }
1079 } 1112 }
1080} 1113}
1081 1114
1082void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 1115void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
1083 const SamplerEntry& entry) { 1116 const bool via_header_index =
1084 const auto view = texture_cache.GetTextureSurface(texture.tic, entry); 1117 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1085 if (!view) {
1086 // Can occur when texture addr is null or its memory is unmapped/invalid
1087 glBindSampler(binding, 0);
1088 glBindTextureUnit(binding, 0);
1089 return;
1090 }
1091 const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
1092 texture.tic.z_source, texture.tic.w_source);
1093 glBindTextureUnit(binding, handle);
1094 if (!view->GetSurfaceParams().IsBuffer()) {
1095 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
1096 }
1097}
1098
1099void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
1100 u32 binding = device.GetBaseBindings(stage_index).image;
1101 for (const auto& entry : shader->GetEntries().images) { 1118 for (const auto& entry : shader->GetEntries().images) {
1102 const auto shader_type = static_cast<ShaderType>(stage_index); 1119 const auto shader_type = static_cast<ShaderType>(stage_index);
1103 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; 1120 const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
1104 SetupImage(binding++, tic, entry); 1121 image_view_indices.push_back(handle.image);
1105 } 1122 }
1106} 1123}
1107 1124
1108void RasterizerOpenGL::SetupComputeImages(Shader* shader) { 1125void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
1109 u32 binding = 0; 1126 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1110 for (const auto& entry : shader->GetEntries().images) { 1127 for (const auto& entry : shader->GetEntries().images) {
1111 const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; 1128 const auto handle =
1112 SetupImage(binding++, tic, entry); 1129 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
1130 image_view_indices.push_back(handle.image);
1113 } 1131 }
1114} 1132}
1115 1133
1116void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
1117 const ImageEntry& entry) {
1118 const auto view = texture_cache.GetImageSurface(tic, entry);
1119 if (!view) {
1120 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
1121 return;
1122 }
1123 if (entry.is_written) {
1124 view->MarkAsModified(texture_cache.Tick());
1125 }
1126 const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1127 glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
1128}
1129
1130void RasterizerOpenGL::SyncViewport() { 1134void RasterizerOpenGL::SyncViewport() {
1131 auto& flags = maxwell3d.dirty.flags; 1135 auto& flags = maxwell3d.dirty.flags;
1132 const auto& regs = maxwell3d.regs; 1136 const auto& regs = maxwell3d.regs;
@@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
1526 flags[Dirty::PointSize] = false; 1530 flags[Dirty::PointSize] = false;
1527 1531
1528 oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); 1532 oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
1533 oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
1529 1534
1530 if (maxwell3d.regs.vp_point_size.enable) {
1531 // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
1532 glEnable(GL_PROGRAM_POINT_SIZE);
1533 return;
1534 }
1535
1536 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1537 // in OpenGL).
1538 glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); 1535 glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
1539 glDisable(GL_PROGRAM_POINT_SIZE);
1540} 1536}
1541 1537
1542void RasterizerOpenGL::SyncLineState() { 1538void RasterizerOpenGL::SyncLineState() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 1d0f585fa..82e03e677 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,12 +7,13 @@
7#include <array> 7#include <array>
8#include <atomic> 8#include <atomic>
9#include <cstddef> 9#include <cstddef>
10#include <map>
11#include <memory> 10#include <memory>
12#include <optional> 11#include <optional>
13#include <tuple> 12#include <tuple>
14#include <utility> 13#include <utility>
15 14
15#include <boost/container/static_vector.hpp>
16
16#include <glad/glad.h> 17#include <glad/glad.h>
17 18
18#include "common/common_types.h" 19#include "common/common_types.h"
@@ -23,16 +24,14 @@
23#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
24#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
25#include "video_core/renderer_opengl/gl_fence_manager.h" 26#include "video_core/renderer_opengl/gl_fence_manager.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_query_cache.h" 27#include "video_core/renderer_opengl/gl_query_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 28#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h" 30#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
33#include "video_core/renderer_opengl/gl_stream_buffer.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 34#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/renderer_opengl/utils.h"
36#include "video_core/shader/async_shaders.h" 35#include "video_core/shader/async_shaders.h"
37#include "video_core/textures/texture.h" 36#include "video_core/textures/texture.h"
38 37
@@ -51,7 +50,7 @@ class MemoryManager;
51namespace OpenGL { 50namespace OpenGL {
52 51
53struct ScreenInfo; 52struct ScreenInfo;
54struct DrawParameters; 53struct ShaderEntries;
55 54
56struct BindlessSSBO { 55struct BindlessSSBO {
57 GLuint64EXT address; 56 GLuint64EXT address;
@@ -62,10 +61,10 @@ static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
62 61
63class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { 62class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
64public: 63public:
65 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, 64 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
66 Core::Memory::Memory& cpu_memory, const Device& device, 65 Core::Memory::Memory& cpu_memory_, const Device& device_,
67 ScreenInfo& screen_info, ProgramManager& program_manager, 66 ScreenInfo& screen_info_, ProgramManager& program_manager_,
68 StateTracker& state_tracker); 67 StateTracker& state_tracker_);
69 ~RasterizerOpenGL() override; 68 ~RasterizerOpenGL() override;
70 69
71 void Draw(bool is_indexed, bool is_instanced) override; 70 void Draw(bool is_indexed, bool is_instanced) override;
@@ -79,15 +78,18 @@ public:
79 void InvalidateRegion(VAddr addr, u64 size) override; 78 void InvalidateRegion(VAddr addr, u64 size) override;
80 void OnCPUWrite(VAddr addr, u64 size) override; 79 void OnCPUWrite(VAddr addr, u64 size) override;
81 void SyncGuestHost() override; 80 void SyncGuestHost() override;
81 void UnmapMemory(VAddr addr, u64 size) override;
82 void SignalSemaphore(GPUVAddr addr, u32 value) override; 82 void SignalSemaphore(GPUVAddr addr, u32 value) override;
83 void SignalSyncPoint(u32 value) override; 83 void SignalSyncPoint(u32 value) override;
84 void ReleaseFences() override; 84 void ReleaseFences() override;
85 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 85 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
86 void WaitForIdle() override; 86 void WaitForIdle() override;
87 void FragmentBarrier() override;
88 void TiledCacheBarrier() override;
87 void FlushCommands() override; 89 void FlushCommands() override;
88 void TickFrame() override; 90 void TickFrame() override;
89 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 91 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
90 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 92 const Tegra::Engines::Fermi2D::Surface& dst,
91 const Tegra::Engines::Fermi2D::Config& copy_config) override; 93 const Tegra::Engines::Fermi2D::Config& copy_config) override;
92 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 94 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
93 u32 pixel_stride) override; 95 u32 pixel_stride) override;
@@ -108,11 +110,14 @@ public:
108 } 110 }
109 111
110private: 112private:
111 /// Configures the color and depth framebuffer states. 113 static constexpr size_t MAX_TEXTURES = 192;
112 void ConfigureFramebuffers(); 114 static constexpr size_t MAX_IMAGES = 48;
115 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
116
117 void BindComputeTextures(Shader* kernel);
113 118
114 /// Configures the color and depth framebuffer for clearing. 119 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
115 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); 120 size_t& image_view_index, size_t& texture_index, size_t& image_index);
116 121
117 /// Configures the current constbuffers to use for the draw command. 122 /// Configures the current constbuffers to use for the draw command.
118 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); 123 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
@@ -136,23 +141,16 @@ private:
136 size_t size, BindlessSSBO* ssbo); 141 size_t size, BindlessSSBO* ssbo);
137 142
138 /// Configures the current textures to use for the draw command. 143 /// Configures the current textures to use for the draw command.
139 void SetupDrawTextures(std::size_t stage_index, Shader* shader); 144 void SetupDrawTextures(const Shader* shader, size_t stage_index);
140 145
141 /// Configures the textures used in a compute shader. 146 /// Configures the textures used in a compute shader.
142 void SetupComputeTextures(Shader* kernel); 147 void SetupComputeTextures(const Shader* kernel);
143
144 /// Configures a texture.
145 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
146 const SamplerEntry& entry);
147 148
148 /// Configures images in a graphics shader. 149 /// Configures images in a graphics shader.
149 void SetupDrawImages(std::size_t stage_index, Shader* shader); 150 void SetupDrawImages(const Shader* shader, size_t stage_index);
150 151
151 /// Configures images in a compute shader. 152 /// Configures images in a compute shader.
152 void SetupComputeImages(Shader* shader); 153 void SetupComputeImages(const Shader* shader);
153
154 /// Configures an image.
155 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
156 154
157 /// Syncs the viewport and depth range to match the guest state 155 /// Syncs the viewport and depth range to match the guest state
158 void SyncViewport(); 156 void SyncViewport();
@@ -227,9 +225,6 @@ private:
227 /// End a transform feedback 225 /// End a transform feedback
228 void EndTransformFeedback(); 226 void EndTransformFeedback();
229 227
230 /// Check for extension that are not strictly required but are needed for correct emulation
231 void CheckExtensions();
232
233 std::size_t CalculateVertexArraysSize() const; 228 std::size_t CalculateVertexArraysSize() const;
234 229
235 std::size_t CalculateIndexBufferSize() const; 230 std::size_t CalculateIndexBufferSize() const;
@@ -242,7 +237,7 @@ private:
242 237
243 GLintptr SetupIndexBuffer(); 238 GLintptr SetupIndexBuffer();
244 239
245 void SetupShaders(GLenum primitive_mode); 240 void SetupShaders();
246 241
247 Tegra::GPU& gpu; 242 Tegra::GPU& gpu;
248 Tegra::Engines::Maxwell3D& maxwell3d; 243 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -254,19 +249,21 @@ private:
254 ProgramManager& program_manager; 249 ProgramManager& program_manager;
255 StateTracker& state_tracker; 250 StateTracker& state_tracker;
256 251
257 TextureCacheOpenGL texture_cache; 252 OGLStreamBuffer stream_buffer;
253 TextureCacheRuntime texture_cache_runtime;
254 TextureCache texture_cache;
258 ShaderCacheOpenGL shader_cache; 255 ShaderCacheOpenGL shader_cache;
259 SamplerCacheOpenGL sampler_cache;
260 FramebufferCacheOpenGL framebuffer_cache;
261 QueryCache query_cache; 256 QueryCache query_cache;
262 OGLBufferCache buffer_cache; 257 OGLBufferCache buffer_cache;
263 FenceManagerOpenGL fence_manager; 258 FenceManagerOpenGL fence_manager;
264 259
265 VideoCommon::Shader::AsyncShaders async_shaders; 260 VideoCommon::Shader::AsyncShaders async_shaders;
266 261
267 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 262 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
268 263 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
269 GLint vertex_binding = 0; 264 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
265 std::array<GLuint, MAX_TEXTURES> texture_handles;
266 std::array<GLuint, MAX_IMAGES> image_handles;
270 267
271 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> 268 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
272 transform_feedback_buffers; 269 transform_feedback_buffers;
@@ -280,7 +277,7 @@ private:
280 std::size_t current_cbuf = 0; 277 std::size_t current_cbuf = 0;
281 OGLBuffer unified_uniform_buffer; 278 OGLBuffer unified_uniform_buffer;
282 279
283 /// Number of commands queued to the OpenGL driver. Reseted on flush. 280 /// Number of commands queued to the OpenGL driver. Resetted on flush.
284 std::size_t num_queued_commands = 0; 281 std::size_t num_queued_commands = 0;
285 282
286 u32 last_clip_distance_mask = 0; 283 u32 last_clip_distance_mask = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0ebcec427..0e34a0f20 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -71,7 +71,7 @@ void OGLSampler::Create() {
71 return; 71 return;
72 72
73 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 73 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
74 glGenSamplers(1, &handle); 74 glCreateSamplers(1, &handle);
75} 75}
76 76
77void OGLSampler::Release() { 77void OGLSampler::Release() {
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
deleted file mode 100644
index 5c174879a..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "video_core/renderer_opengl/gl_resource_manager.h"
7#include "video_core/renderer_opengl/gl_sampler_cache.h"
8#include "video_core/renderer_opengl/maxwell_to_gl.h"
9
10namespace OpenGL {
11
12SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
13
14SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
15
16OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
17 OGLSampler sampler;
18 sampler.Create();
19
20 const GLuint sampler_id{sampler.handle};
21 glSamplerParameteri(
22 sampler_id, GL_TEXTURE_MAG_FILTER,
23 MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
24 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
25 MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
26 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
27 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
28 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
29 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
30 tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
31 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
32 MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
33 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
34 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
35 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
36 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
37 if (GLAD_GL_ARB_texture_filter_anisotropic) {
38 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
39 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
40 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
41 } else {
42 LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
43 }
44
45 return sampler;
46}
47
48GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
49 return sampler.handle;
50}
51
52} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
deleted file mode 100644
index 34ee37f00..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ /dev/null
@@ -1,25 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <glad/glad.h>
8
9#include "video_core/renderer_opengl/gl_resource_manager.h"
10#include "video_core/sampler_cache.h"
11
12namespace OpenGL {
13
14class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
15public:
16 explicit SamplerCacheOpenGL();
17 ~SamplerCacheOpenGL();
18
19protected:
20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
21
22 GLuint ToSamplerType(const OGLSampler& sampler) const override;
23};
24
25} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index bd56bed0c..d4841fdb7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -27,7 +27,6 @@
27#include "video_core/renderer_opengl/gl_shader_decompiler.h" 27#include "video_core/renderer_opengl/gl_shader_decompiler.h"
28#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h" 29#include "video_core/renderer_opengl/gl_state_tracker.h"
30#include "video_core/renderer_opengl/utils.h"
31#include "video_core/shader/memory_util.h" 30#include "video_core/shader/memory_util.h"
32#include "video_core/shader/registry.h" 31#include "video_core/shader/registry.h"
33#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
@@ -198,10 +197,10 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
198 return program; 197 return program;
199} 198}
200 199
201Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, 200Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
202 ProgramSharedPtr program_, bool is_built) 201 ProgramSharedPtr program_, bool is_built_)
203 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, 202 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
204 is_built(is_built) { 203 is_built{is_built_} {
205 handle = program->assembly_program.handle; 204 handle = program->assembly_program.handle;
206 if (handle == 0) { 205 if (handle == 0) {
207 handle = program->source_program.handle; 206 handle = program->source_program.handle;
@@ -318,14 +317,13 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
318 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); 317 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
319} 318}
320 319
321ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, 320ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
322 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 321 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
323 Tegra::Engines::Maxwell3D& maxwell3d_, 322 Tegra::Engines::Maxwell3D& maxwell3d_,
324 Tegra::Engines::KeplerCompute& kepler_compute_, 323 Tegra::Engines::KeplerCompute& kepler_compute_,
325 Tegra::MemoryManager& gpu_memory_, const Device& device_) 324 Tegra::MemoryManager& gpu_memory_, const Device& device_)
326 : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_}, 325 : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
327 gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, 326 maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
328 kepler_compute{kepler_compute_}, device{device_} {}
329 327
330ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; 328ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
331 329
@@ -460,7 +458,7 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
460ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( 458ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
461 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, 459 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
462 const std::unordered_set<GLenum>& supported_formats) { 460 const std::unordered_set<GLenum>& supported_formats) {
463 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { 461 if (!supported_formats.contains(precompiled_entry.binary_format)) {
464 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); 462 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
465 return {}; 463 return {};
466 } 464 }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 1708af06a..2aed0697e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -108,7 +108,7 @@ public:
108 108
109private: 109private:
110 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, 110 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
111 ProgramSharedPtr program, bool is_built = true); 111 ProgramSharedPtr program, bool is_built_ = true);
112 112
113 std::shared_ptr<VideoCommon::Shader::Registry> registry; 113 std::shared_ptr<VideoCommon::Shader::Registry> registry;
114 ShaderEntries entries; 114 ShaderEntries entries;
@@ -119,10 +119,11 @@ private:
119 119
120class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { 120class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
121public: 121public:
122 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window, 122 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
123 Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, 123 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
124 Tegra::Engines::KeplerCompute& kepler_compute, 124 Tegra::Engines::Maxwell3D& maxwell3d_,
125 Tegra::MemoryManager& gpu_memory, const Device& device); 125 Tegra::Engines::KeplerCompute& kepler_compute_,
126 Tegra::MemoryManager& gpu_memory_, const Device& device_);
126 ~ShaderCacheOpenGL() override; 127 ~ShaderCacheOpenGL() override;
127 128
128 /// Loads disk cache for the current game 129 /// Loads disk cache for the current game
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 95ca96c8e..2e1fa252d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using Tegra::Shader::TextureType; 40using Tegra::Shader::TextureType;
41using VideoCommon::Shader::BuildTransformFeedback;
42using VideoCommon::Shader::Registry;
43 41
44using namespace std::string_literals;
45using namespace VideoCommon::Shader; 42using namespace VideoCommon::Shader;
43using namespace std::string_literals;
46 44
47using Maxwell = Tegra::Engines::Maxwell3D::Regs; 45using Maxwell = Tegra::Engines::Maxwell3D::Regs;
48using Operation = const OperationNode&; 46using Operation = const OperationNode&;
@@ -131,7 +129,7 @@ private:
131 129
132class Expression final { 130class Expression final {
133public: 131public:
134 Expression(std::string code, Type type) : code{std::move(code)}, type{type} { 132 Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
135 ASSERT(type != Type::Void); 133 ASSERT(type != Type::Void);
136 } 134 }
137 Expression() : type{Type::Void} {} 135 Expression() : type{Type::Void} {}
@@ -148,8 +146,8 @@ public:
148 ASSERT(type == Type::Void); 146 ASSERT(type == Type::Void);
149 } 147 }
150 148
151 std::string As(Type type) const { 149 std::string As(Type type_) const {
152 switch (type) { 150 switch (type_) {
153 case Type::Bool: 151 case Type::Bool:
154 return AsBool(); 152 return AsBool();
155 case Type::Bool2: 153 case Type::Bool2:
@@ -316,7 +314,7 @@ std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology t
316 case Maxwell::PrimitiveTopology::TriangleStripAdjacency: 314 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
317 return {"triangles_adjacency", 6}; 315 return {"triangles_adjacency", 6};
318 default: 316 default:
319 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); 317 UNIMPLEMENTED_MSG("topology={}", topology);
320 return {"points", 1}; 318 return {"points", 1};
321 } 319 }
322} 320}
@@ -342,7 +340,7 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
342 case Tegra::Shader::OutputTopology::TriangleStrip: 340 case Tegra::Shader::OutputTopology::TriangleStrip:
343 return "triangle_strip"; 341 return "triangle_strip";
344 default: 342 default:
345 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); 343 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
346 return "points"; 344 return "points";
347 } 345 }
348} 346}
@@ -418,11 +416,12 @@ struct GenericVaryingDescription {
418 416
419class GLSLDecompiler final { 417class GLSLDecompiler final {
420public: 418public:
421 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 419 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
422 ShaderType stage, std::string_view identifier, std::string_view suffix) 420 ShaderType stage_, std::string_view identifier_,
423 : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier}, 421 std::string_view suffix_)
424 suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{ 422 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
425 UseUnifiedUniforms(device, ir, stage)} { 423 suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
424 UseUnifiedUniforms(device_, ir_, stage_)} {
426 if (stage != ShaderType::Compute) { 425 if (stage != ShaderType::Compute) {
427 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); 426 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
428 } 427 }
@@ -744,7 +743,7 @@ private:
744 case PixelImap::Unused: 743 case PixelImap::Unused:
745 break; 744 break;
746 } 745 }
747 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); 746 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
748 return {}; 747 return {};
749 } 748 }
750 749
@@ -777,16 +776,16 @@ private:
777 name = "gs_" + name + "[]"; 776 name = "gs_" + name + "[]";
778 } 777 }
779 778
780 std::string suffix; 779 std::string suffix_;
781 if (stage == ShaderType::Fragment) { 780 if (stage == ShaderType::Fragment) {
782 const auto input_mode{header.ps.GetPixelImap(location)}; 781 const auto input_mode{header.ps.GetPixelImap(location)};
783 if (input_mode == PixelImap::Unused) { 782 if (input_mode == PixelImap::Unused) {
784 return; 783 return;
785 } 784 }
786 suffix = GetInputFlags(input_mode); 785 suffix_ = GetInputFlags(input_mode);
787 } 786 }
788 787
789 code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); 788 code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
790 } 789 }
791 790
792 void DeclareOutputAttributes() { 791 void DeclareOutputAttributes() {
@@ -877,7 +876,7 @@ private:
877 } 876 }
878 877
879 u32 binding = device.GetBaseBindings(stage).uniform_buffer; 878 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
880 for (const auto [index, info] : ir.GetConstantBuffers()) { 879 for (const auto& [index, info] : ir.GetConstantBuffers()) {
881 const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4; 880 const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
882 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; 881 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
883 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, 882 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
@@ -1251,7 +1250,7 @@ private:
1251 } 1250 }
1252 break; 1251 break;
1253 } 1252 }
1254 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 1253 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1255 return {"0", Type::Int}; 1254 return {"0", Type::Int};
1256 } 1255 }
1257 1256
@@ -1331,7 +1330,7 @@ private:
1331 GetSwizzle(element)), 1330 GetSwizzle(element)),
1332 Type::Float}}; 1331 Type::Float}};
1333 } 1332 }
1334 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); 1333 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
1335 return std::nullopt; 1334 return std::nullopt;
1336 } 1335 }
1337 } 1336 }
@@ -2100,13 +2099,13 @@ private:
2100 const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; 2099 const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
2101 const bool separate_dc = meta.sampler.is_shadow; 2100 const bool separate_dc = meta.sampler.is_shadow;
2102 2101
2103 std::vector<TextureIR> ir; 2102 std::vector<TextureIR> ir_;
2104 if (meta.sampler.is_shadow) { 2103 if (meta.sampler.is_shadow) {
2105 ir = {TextureOffset{}}; 2104 ir_ = {TextureOffset{}};
2106 } else { 2105 } else {
2107 ir = {TextureOffset{}, TextureArgument{type, meta.component}}; 2106 ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
2108 } 2107 }
2109 return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element), 2108 return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
2110 Type::Float}; 2109 Type::Float};
2111 } 2110 }
2112 2111
@@ -2752,11 +2751,11 @@ private:
2752 } 2751 }
2753 } 2752 }
2754 2753
2755 std::string GetSampler(const Sampler& sampler) const { 2754 std::string GetSampler(const SamplerEntry& sampler) const {
2756 return AppendSuffix(sampler.index, "sampler"); 2755 return AppendSuffix(sampler.index, "sampler");
2757 } 2756 }
2758 2757
2759 std::string GetImage(const Image& image) const { 2758 std::string GetImage(const ImageEntry& image) const {
2760 return AppendSuffix(image.index, "image"); 2759 return AppendSuffix(image.index, "image");
2761 } 2760 }
2762 2761
@@ -2801,7 +2800,7 @@ std::string GetFlowVariable(u32 index) {
2801 2800
2802class ExprDecompiler { 2801class ExprDecompiler {
2803public: 2802public:
2804 explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} 2803 explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2805 2804
2806 void operator()(const ExprAnd& expr) { 2805 void operator()(const ExprAnd& expr) {
2807 inner += '('; 2806 inner += '(';
@@ -2856,7 +2855,7 @@ private:
2856 2855
2857class ASTDecompiler { 2856class ASTDecompiler {
2858public: 2857public:
2859 explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} 2858 explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2860 2859
2861 void operator()(const ASTProgram& ast) { 2860 void operator()(const ASTProgram& ast) {
2862 ASTNode current = ast.nodes.GetFirst(); 2861 ASTNode current = ast.nodes.GetFirst();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 451c9689a..be68994bb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,13 +20,13 @@ namespace OpenGL {
20class Device; 20class Device;
21 21
22using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23using SamplerEntry = VideoCommon::Shader::Sampler; 23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using ImageEntry = VideoCommon::Shader::Image; 24using ImageEntry = VideoCommon::Shader::ImageEntry;
25 25
26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { 26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
27public: 27public:
28 explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index) 28 explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
29 : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {} 29 : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
30 30
31 u32 GetIndex() const { 31 u32 GetIndex() const {
32 return index; 32 return index;
@@ -37,10 +37,10 @@ private:
37}; 37};
38 38
39struct GlobalMemoryEntry { 39struct GlobalMemoryEntry {
40 constexpr explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, 40 constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
41 bool is_written) 41 bool is_written_)
42 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ 42 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
43 is_written} {} 43 is_written_} {}
44 44
45 u32 cbuf_index = 0; 45 u32 cbuf_index = 0;
46 u32 cbuf_offset = 0; 46 u32 cbuf_offset = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 70dd0c3c6..955b2abc4 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -343,7 +343,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
343 } 343 }
344 344
345 const u64 id = entry.unique_identifier; 345 const u64 id = entry.unique_identifier;
346 if (stored_transferable.find(id) != stored_transferable.end()) { 346 if (stored_transferable.contains(id)) {
347 // The shader already exists 347 // The shader already exists
348 return; 348 return;
349 } 349 }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 691c6c79b..553e6e8d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
83 } 83 }
84} 84}
85 85
86void ProgramManager::BindHostCompute(GLuint program) {
87 if (use_assembly_programs) {
88 glDisable(GL_COMPUTE_PROGRAM_NV);
89 }
90 glUseProgram(program);
91 is_graphics_bound = false;
92}
93
94void ProgramManager::RestoreGuestCompute() {
95 if (use_assembly_programs) {
96 glEnable(GL_COMPUTE_PROGRAM_NV);
97 glUseProgram(0);
98 }
99}
100
86void ProgramManager::UseVertexShader(GLuint program) { 101void ProgramManager::UseVertexShader(GLuint program) {
87 if (use_assembly_programs) { 102 if (use_assembly_programs) {
88 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); 103 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 950e0dfcb..ad42cce74 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,6 +45,12 @@ public:
45 /// Rewinds BindHostPipeline state changes. 45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline(); 46 void RestoreGuestPipeline();
47 47
48 /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
49 void BindHostCompute(GLuint program);
50
51 /// Rewinds BindHostCompute state changes.
52 void RestoreGuestCompute();
53
48 void UseVertexShader(GLuint program); 54 void UseVertexShader(GLuint program);
49 void UseGeometryShader(GLuint program); 55 void UseGeometryShader(GLuint program);
50 void UseFragmentShader(GLuint program); 56 void UseFragmentShader(GLuint program);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 6bcf831f2..60e6fa39f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -13,7 +13,7 @@
13#include "video_core/renderer_opengl/gl_state_tracker.h" 13#include "video_core/renderer_opengl/gl_state_tracker.h"
14 14
15#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) 15#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
16#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) 16#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
17 17
18namespace OpenGL { 18namespace OpenGL {
19 19
@@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
249 } 249 }
250} 250}
251 251
252void StateTracker::InvalidateStreamBuffer() {
253 flags[Dirty::VertexBuffers] = true;
254 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
255 flags[index] = true;
256 }
257}
258
252} // namespace OpenGL 259} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 9d127548f..574615d3c 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -92,6 +92,8 @@ class StateTracker {
92public: 92public:
93 explicit StateTracker(Tegra::GPU& gpu); 93 explicit StateTracker(Tegra::GPU& gpu);
94 94
95 void InvalidateStreamBuffer();
96
95 void BindIndexBuffer(GLuint new_index_buffer) { 97 void BindIndexBuffer(GLuint new_index_buffer) {
96 if (index_buffer == new_index_buffer) { 98 if (index_buffer == new_index_buffer) {
97 return; 99 return;
@@ -100,6 +102,14 @@ public:
100 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); 102 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
101 } 103 }
102 104
105 void BindFramebuffer(GLuint new_framebuffer) {
106 if (framebuffer == new_framebuffer) {
107 return;
108 }
109 framebuffer = new_framebuffer;
110 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
111 }
112
103 void NotifyScreenDrawVertexArray() { 113 void NotifyScreenDrawVertexArray() {
104 flags[OpenGL::Dirty::VertexFormats] = true; 114 flags[OpenGL::Dirty::VertexFormats] = true;
105 flags[OpenGL::Dirty::VertexFormat0 + 0] = true; 115 flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
@@ -129,9 +139,9 @@ public:
129 flags[OpenGL::Dirty::Scissor0] = true; 139 flags[OpenGL::Dirty::Scissor0] = true;
130 } 140 }
131 141
132 void NotifyColorMask0() { 142 void NotifyColorMask(size_t index) {
133 flags[OpenGL::Dirty::ColorMasks] = true; 143 flags[OpenGL::Dirty::ColorMasks] = true;
134 flags[OpenGL::Dirty::ColorMask0] = true; 144 flags[OpenGL::Dirty::ColorMask0 + index] = true;
135 } 145 }
136 146
137 void NotifyBlend0() { 147 void NotifyBlend0() {
@@ -190,6 +200,7 @@ public:
190private: 200private:
191 Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; 201 Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
192 202
203 GLuint framebuffer = 0;
193 GLuint index_buffer = 0; 204 GLuint index_buffer = 0;
194}; 205};
195 206
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 887995cf4..e0819cdf2 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,6 +9,7 @@
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h" 11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
12#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
13 14
14MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 15MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
16 17
17namespace OpenGL { 18namespace OpenGL {
18 19
19OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) 20OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
20 : buffer_size(size) { 21 : state_tracker{state_tracker_} {
21 gl_buffer.Create(); 22 gl_buffer.Create();
22 23
23 GLsizeiptr allocate_size = size;
24 if (vertex_data_usage) {
25 // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
26 // read position is near the end and is an out-of-bound access to the vertex buffer. This is
27 // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
28 // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
29 // crash.
30 allocate_size *= 2;
31 }
32
33 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; 24 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
34 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); 25 glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
35 mapped_ptr = static_cast<u8*>( 26 mapped_ptr = static_cast<u8*>(
36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); 27 glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
37 28
38 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { 29 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); 30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
@@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
46 gl_buffer.Release(); 37 gl_buffer.Release();
47} 38}
48 39
49std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { 40std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
50 ASSERT(size <= buffer_size); 41 ASSERT(size <= BUFFER_SIZE);
51 ASSERT(alignment <= buffer_size); 42 ASSERT(alignment <= BUFFER_SIZE);
52 mapped_size = size; 43 mapped_size = size;
53 44
54 if (alignment > 0) { 45 if (alignment > 0) {
55 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); 46 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
56 } 47 }
57 48
58 bool invalidate = false; 49 if (buffer_pos + size > BUFFER_SIZE) {
59 if (buffer_pos + size > buffer_size) {
60 MICROPROFILE_SCOPE(OpenGL_StreamBuffer); 50 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
61 glInvalidateBufferData(gl_buffer.handle); 51 glInvalidateBufferData(gl_buffer.handle);
52 state_tracker.InvalidateStreamBuffer();
62 53
63 buffer_pos = 0; 54 buffer_pos = 0;
64 invalidate = true;
65 } 55 }
66 56
67 return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); 57 return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
68} 58}
69 59
70void OGLStreamBuffer::Unmap(GLsizeiptr size) { 60void OGLStreamBuffer::Unmap(GLsizeiptr size) {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 307a67113..dd9cf67eb 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -4,29 +4,31 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <tuple> 7#include <utility>
8
8#include <glad/glad.h> 9#include <glad/glad.h>
10
9#include "common/common_types.h" 11#include "common/common_types.h"
10#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
11 13
12namespace OpenGL { 14namespace OpenGL {
13 15
14class Device; 16class Device;
17class StateTracker;
15 18
16class OGLStreamBuffer : private NonCopyable { 19class OGLStreamBuffer : private NonCopyable {
17public: 20public:
18 explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); 21 explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
19 ~OGLStreamBuffer(); 22 ~OGLStreamBuffer();
20 23
21 /* 24 /*
22 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes 25 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
23 * and the optional alignment requirement. 26 * and the optional alignment requirement.
24 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. 27 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
25 * The return values are the pointer to the new chunk, the offset within the buffer, 28 * The return values are the pointer to the new chunk, and the offset within the buffer.
26 * and the invalidation flag for previous chunks.
27 * The actual used size must be specified on unmapping the chunk. 29 * The actual used size must be specified on unmapping the chunk.
28 */ 30 */
29 std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); 31 std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
30 32
31 void Unmap(GLsizeiptr size); 33 void Unmap(GLsizeiptr size);
32 34
@@ -39,15 +41,18 @@ public:
39 } 41 }
40 42
41 GLsizeiptr Size() const noexcept { 43 GLsizeiptr Size() const noexcept {
42 return buffer_size; 44 return BUFFER_SIZE;
43 } 45 }
44 46
45private: 47private:
48 static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
49
50 StateTracker& state_tracker;
51
46 OGLBuffer gl_buffer; 52 OGLBuffer gl_buffer;
47 53
48 GLuint64EXT gpu_address = 0; 54 GLuint64EXT gpu_address = 0;
49 GLintptr buffer_pos = 0; 55 GLintptr buffer_pos = 0;
50 GLsizeiptr buffer_size = 0;
51 GLsizeiptr mapped_size = 0; 56 GLsizeiptr mapped_size = 0;
52 u8* mapped_ptr = nullptr; 57 u8* mapped_ptr = nullptr;
53}; 58};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index a863ef218..4c690418c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -2,173 +2,238 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include <algorithm>
6#include "common/bit_util.h" 6#include <array>
7#include "common/common_types.h" 7#include <bit>
8#include "common/microprofile.h" 8#include <string>
9#include "common/scope_exit.h" 9
10#include "core/core.h" 10#include <glad/glad.h>
11#include "video_core/morton.h" 11
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_shader_manager.h"
13#include "video_core/renderer_opengl/gl_state_tracker.h" 14#include "video_core/renderer_opengl/gl_state_tracker.h"
14#include "video_core/renderer_opengl/gl_texture_cache.h" 15#include "video_core/renderer_opengl/gl_texture_cache.h"
15#include "video_core/renderer_opengl/utils.h" 16#include "video_core/renderer_opengl/maxwell_to_gl.h"
16#include "video_core/texture_cache/surface_base.h" 17#include "video_core/renderer_opengl/util_shaders.h"
18#include "video_core/surface.h"
19#include "video_core/texture_cache/format_lookup_table.h"
20#include "video_core/texture_cache/samples_helper.h"
17#include "video_core/texture_cache/texture_cache.h" 21#include "video_core/texture_cache/texture_cache.h"
18#include "video_core/textures/convert.h" 22#include "video_core/textures/decoders.h"
19#include "video_core/textures/texture.h"
20 23
21namespace OpenGL { 24namespace OpenGL {
22 25
23using Tegra::Texture::SwizzleSource; 26namespace {
24using VideoCore::MortonSwizzleMode;
25 27
28using Tegra::Texture::SwizzleSource;
29using Tegra::Texture::TextureMipmapFilter;
30using Tegra::Texture::TextureType;
31using Tegra::Texture::TICEntry;
32using Tegra::Texture::TSCEntry;
33using VideoCommon::CalculateLevelStrideAlignment;
34using VideoCommon::ImageCopy;
35using VideoCommon::ImageFlagBits;
36using VideoCommon::ImageType;
37using VideoCommon::NUM_RT;
38using VideoCommon::SamplesLog2;
39using VideoCommon::SwizzleParameters;
40using VideoCore::Surface::BytesPerBlock;
41using VideoCore::Surface::IsPixelFormatASTC;
42using VideoCore::Surface::IsPixelFormatSRGB;
43using VideoCore::Surface::MaxPixelFormat;
26using VideoCore::Surface::PixelFormat; 44using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::SurfaceTarget;
28using VideoCore::Surface::SurfaceType; 45using VideoCore::Surface::SurfaceType;
29 46
30MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); 47struct CopyOrigin {
31MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); 48 GLint level;
32MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", 49 GLint x;
33 MP_RGB(128, 192, 128)); 50 GLint y;
51 GLint z;
52};
34 53
35namespace { 54struct CopyRegion {
55 GLsizei width;
56 GLsizei height;
57 GLsizei depth;
58};
36 59
37struct FormatTuple { 60struct FormatTuple {
38 GLenum internal_format; 61 GLenum internal_format;
39 GLenum format = GL_NONE; 62 GLenum format = GL_NONE;
40 GLenum type = GL_NONE; 63 GLenum type = GL_NONE;
64 GLenum store_format = internal_format;
41}; 65};
42 66
43constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ 67constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
44 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM 68 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
45 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM 69 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
46 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT 70 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
47 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT 71 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
48 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM 72 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM 73 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
50 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM 74 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
51 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM 75 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
52 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT 76 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
53 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM 77 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
54 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM 78 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
55 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM 79 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
56 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT 80 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
57 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT 81 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
58 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT 82 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
59 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM 83 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
60 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM 84 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
61 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT 85 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
62 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT 86 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
63 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT 87 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
64 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT 88 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
65 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM 89 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
66 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM 90 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
67 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM 91 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
68 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM 92 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
69 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM 93 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
70 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM 94 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
71 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM 95 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
72 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM 96 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
73 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT 97 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
74 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT 98 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
75 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM 99 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
76 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM 100 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
77 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT 101 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
78 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT 102 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
79 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT 103 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
80 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT 104 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
81 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT 105 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
82 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT 106 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
83 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM 107 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
84 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM 108 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
85 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT 109 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
86 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT 110 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
87 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM 111 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
88 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT 112 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
89 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT 113 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
90 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT 114 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
91 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM 115 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
92 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT 116 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
93 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB 117 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB
94 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM 118 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
95 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM 119 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
96 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT 120 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
97 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT 121 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
98 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT 122 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
99 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT 123 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
100 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT 124 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
101 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT 125 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
102 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM 126 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
103 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM 127 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
104 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM 128 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
105 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM 129 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM
106 // Compressed sRGB formats 130 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
107 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB 131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
108 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB 132 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
109 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB 133 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
110 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB 134 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
111 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM 135 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
112 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB 136 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
113 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB 137 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
114 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB 138 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
115 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB 139 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
116 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM 140 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
117 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB 141 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
118 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM 142 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
119 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB 143 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
120 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM 144 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
121 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB 145 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
122 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM 146 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
123 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB 147 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
124 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM 148 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
125 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB 149 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
126 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM 150 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
127 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB 151 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
128 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM 152 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
129 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB 153 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
130 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT 154 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
131 155 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
132 // Depth formats 156 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
133 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT 157 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
134 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
135
136 // DepthStencil formats
137 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
138 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
139 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, 158 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
140 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT 159 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
141}}; 160}};
142 161
162constexpr std::array ACCELERATED_FORMATS{
163 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
164 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
165 GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I,
166 GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I,
167 GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16,
168 GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM,
169 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
170};
171
143const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { 172const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
144 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 173 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
145 return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; 174 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
146} 175}
147 176
148GLenum GetTextureTarget(const SurfaceTarget& target) { 177GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
149 switch (target) { 178 switch (info.type) {
150 case SurfaceTarget::TextureBuffer: 179 case ImageType::e1D:
180 return GL_TEXTURE_1D_ARRAY;
181 case ImageType::e2D:
182 if (info.num_samples > 1) {
183 return GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
184 }
185 return GL_TEXTURE_2D_ARRAY;
186 case ImageType::e3D:
187 return GL_TEXTURE_3D;
188 case ImageType::Linear:
189 return GL_TEXTURE_2D_ARRAY;
190 case ImageType::Buffer:
151 return GL_TEXTURE_BUFFER; 191 return GL_TEXTURE_BUFFER;
152 case SurfaceTarget::Texture1D: 192 }
193 UNREACHABLE_MSG("Invalid image type={}", info.type);
194 return GL_NONE;
195}
196
197GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
198 const bool is_multisampled = num_samples > 1;
199 switch (type) {
200 case ImageViewType::e1D:
153 return GL_TEXTURE_1D; 201 return GL_TEXTURE_1D;
154 case SurfaceTarget::Texture2D: 202 case ImageViewType::e2D:
155 return GL_TEXTURE_2D; 203 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
156 case SurfaceTarget::Texture3D: 204 case ImageViewType::Cube:
205 return GL_TEXTURE_CUBE_MAP;
206 case ImageViewType::e3D:
157 return GL_TEXTURE_3D; 207 return GL_TEXTURE_3D;
158 case SurfaceTarget::Texture1DArray: 208 case ImageViewType::e1DArray:
159 return GL_TEXTURE_1D_ARRAY; 209 return GL_TEXTURE_1D_ARRAY;
160 case SurfaceTarget::Texture2DArray: 210 case ImageViewType::e2DArray:
161 return GL_TEXTURE_2D_ARRAY; 211 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
162 case SurfaceTarget::TextureCubemap: 212 case ImageViewType::CubeArray:
163 return GL_TEXTURE_CUBE_MAP;
164 case SurfaceTarget::TextureCubeArray:
165 return GL_TEXTURE_CUBE_MAP_ARRAY; 213 return GL_TEXTURE_CUBE_MAP_ARRAY;
214 case ImageViewType::Rect:
215 return GL_TEXTURE_RECTANGLE;
216 case ImageViewType::Buffer:
217 return GL_TEXTURE_BUFFER;
166 } 218 }
167 UNREACHABLE(); 219 UNREACHABLE_MSG("Invalid image view type={}", type);
168 return {}; 220 return GL_NONE;
169} 221}
170 222
171GLint GetSwizzleSource(SwizzleSource source) { 223GLenum TextureMode(PixelFormat format, bool is_first) {
224 switch (format) {
225 case PixelFormat::D24_UNORM_S8_UINT:
226 case PixelFormat::D32_FLOAT_S8_UINT:
227 return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
228 case PixelFormat::S8_UINT_D24_UNORM:
229 return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
230 default:
231 UNREACHABLE();
232 return GL_DEPTH_COMPONENT;
233 }
234}
235
236GLint Swizzle(SwizzleSource source) {
172 switch (source) { 237 switch (source) {
173 case SwizzleSource::Zero: 238 case SwizzleSource::Zero:
174 return GL_ZERO; 239 return GL_ZERO;
@@ -184,531 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) {
184 case SwizzleSource::OneFloat: 249 case SwizzleSource::OneFloat:
185 return GL_ONE; 250 return GL_ONE;
186 } 251 }
187 UNREACHABLE(); 252 UNREACHABLE_MSG("Invalid swizzle source={}", source);
188 return GL_NONE; 253 return GL_NONE;
189} 254}
190 255
191GLenum GetComponent(PixelFormat format, bool is_first) { 256GLenum AttachmentType(PixelFormat format) {
192 switch (format) { 257 switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
193 case PixelFormat::D24_UNORM_S8_UINT: 258 case SurfaceType::Depth:
194 case PixelFormat::D32_FLOAT_S8_UINT: 259 return GL_DEPTH_ATTACHMENT;
195 return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; 260 case SurfaceType::DepthStencil:
196 case PixelFormat::S8_UINT_D24_UNORM: 261 return GL_DEPTH_STENCIL_ATTACHMENT;
197 return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
198 default: 262 default:
199 UNREACHABLE(); 263 UNIMPLEMENTED_MSG("Unimplemented type={}", type);
200 return GL_DEPTH_COMPONENT; 264 return GL_NONE;
201 } 265 }
202} 266}
203 267
204void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { 268[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) {
205 if (params.IsBuffer()) { 269 if (!device.HasASTC() && IsPixelFormatASTC(format)) {
206 return; 270 return true;
207 } 271 }
208 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 272 switch (format) {
209 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 273 case PixelFormat::BC4_UNORM:
210 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 274 case PixelFormat::BC5_UNORM:
211 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 275 return type == ImageType::e3D;
212 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); 276 default:
213 if (params.num_levels == 1) { 277 break;
214 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
215 } 278 }
279 return false;
216} 280}
217 281
218OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, 282[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
219 OGLBuffer& texture_buffer) { 283 switch (value) {
220 OGLTexture texture; 284 case SwizzleSource::G:
221 texture.Create(target); 285 return SwizzleSource::R;
286 default:
287 return value;
288 }
289}
222 290
223 switch (params.target) { 291void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
224 case SurfaceTarget::Texture1D: 292 switch (format) {
225 glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); 293 case PixelFormat::D24_UNORM_S8_UINT:
226 break; 294 case PixelFormat::D32_FLOAT_S8_UINT:
227 case SurfaceTarget::TextureBuffer: 295 case PixelFormat::S8_UINT_D24_UNORM:
228 texture_buffer.Create(); 296 UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
229 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), 297 glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
230 nullptr, GL_DYNAMIC_STORAGE_BIT); 298 TextureMode(format, swizzle[0] == SwizzleSource::R));
231 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); 299 std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
232 break; 300 break;
233 case SurfaceTarget::Texture2D: 301 default:
234 case SurfaceTarget::TextureCubemap:
235 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
236 params.height);
237 break; 302 break;
238 case SurfaceTarget::Texture3D: 303 }
239 case SurfaceTarget::Texture2DArray: 304 std::array<GLint, 4> gl_swizzle;
240 case SurfaceTarget::TextureCubeArray: 305 std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle);
241 glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, 306 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
242 params.height, params.depth); 307}
308
309[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
310 const VideoCommon::ImageInfo& info) {
311 // Disable accelerated uploads for now as they don't implement swizzled uploads
312 return false;
313 switch (info.type) {
314 case ImageType::e2D:
315 case ImageType::e3D:
316 case ImageType::Linear:
243 break; 317 break;
244 default: 318 default:
245 UNREACHABLE(); 319 return false;
320 }
321 const GLenum internal_format = GetFormatTuple(info.format).internal_format;
322 const auto& format_info = runtime.FormatInfo(info.type, internal_format);
323 if (format_info.is_compressed) {
324 return false;
325 }
326 if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
327 return false;
246 } 328 }
329 if (format_info.compatibility_by_size) {
330 return true;
331 }
332 const GLenum store_format = StoreFormat(BytesPerBlock(info.format));
333 const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class;
334 return format_info.compatibility_class == store_class;
335}
247 336
248 ApplyTextureDefaults(params, texture.handle); 337[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
338 VideoCommon::SubresourceLayers subresource, GLenum target) {
339 switch (target) {
340 case GL_TEXTURE_2D_ARRAY:
341 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
342 return CopyOrigin{
343 .level = static_cast<GLint>(subresource.base_level),
344 .x = static_cast<GLint>(offset.x),
345 .y = static_cast<GLint>(offset.y),
346 .z = static_cast<GLint>(subresource.base_layer),
347 };
348 case GL_TEXTURE_3D:
349 return CopyOrigin{
350 .level = static_cast<GLint>(subresource.base_level),
351 .x = static_cast<GLint>(offset.x),
352 .y = static_cast<GLint>(offset.y),
353 .z = static_cast<GLint>(offset.z),
354 };
355 default:
356 UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
357 return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0};
358 }
359}
249 360
250 return texture; 361[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent,
362 VideoCommon::SubresourceLayers dst_subresource,
363 GLenum target) {
364 switch (target) {
365 case GL_TEXTURE_2D_ARRAY:
366 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
367 return CopyRegion{
368 .width = static_cast<GLsizei>(extent.width),
369 .height = static_cast<GLsizei>(extent.height),
370 .depth = static_cast<GLsizei>(dst_subresource.num_layers),
371 };
372 case GL_TEXTURE_3D:
373 return CopyRegion{
374 .width = static_cast<GLsizei>(extent.width),
375 .height = static_cast<GLsizei>(extent.height),
376 .depth = static_cast<GLsizei>(extent.depth),
377 };
378 default:
379 UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
380 return CopyRegion{.width = 0, .height = 0, .depth = 0};
381 }
251} 382}
252 383
253constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, 384void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
254 SwizzleSource w_source) { 385 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
255 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | 386 const GLuint texture = image_view->DefaultHandle();
256 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); 387 glNamedFramebufferTexture(fbo, attachment, texture, 0);
388 return;
389 }
390 const GLuint texture = image_view->Handle(ImageViewType::e3D);
391 if (image_view->range.extent.layers > 1) {
392 // TODO: OpenGL doesn't support rendering to a fixed number of slices
393 glNamedFramebufferTexture(fbo, attachment, texture, 0);
394 } else {
395 const u32 slice = image_view->range.base.layer;
396 glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice);
397 }
257} 398}
258 399
259} // Anonymous namespace 400} // Anonymous namespace
260 401
261CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, 402ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
262 bool is_astc_supported) 403 : span(map, size), sync{sync_}, handle{handle_} {}
263 : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
264 if (is_converted) {
265 internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
266 format = GL_RGBA;
267 type = GL_UNSIGNED_BYTE;
268 } else {
269 const auto& tuple{GetFormatTuple(params.pixel_format)};
270 internal_format = tuple.internal_format;
271 format = tuple.format;
272 type = tuple.type;
273 is_compressed = params.IsCompressed();
274 }
275 target = GetTextureTarget(params.target);
276 texture = CreateTexture(params, target, internal_format, texture_buffer);
277 DecorateSurfaceName();
278 404
279 u32 num_layers = 1; 405ImageBufferMap::~ImageBufferMap() {
280 if (params.is_layered || params.target == SurfaceTarget::Texture3D) { 406 if (sync) {
281 num_layers = params.depth; 407 sync->Create();
282 } 408 }
283
284 main_view =
285 CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
286} 409}
287 410
288CachedSurface::~CachedSurface() = default; 411TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
412 StateTracker& state_tracker_)
413 : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
414 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
415 for (size_t i = 0; i < TARGETS.size(); ++i) {
416 const GLenum target = TARGETS[i];
417 for (const FormatTuple& tuple : FORMAT_TABLE) {
418 const GLenum format = tuple.internal_format;
419 GLint compat_class;
420 GLint compat_type;
421 GLint is_compressed;
422 glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class);
423 glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1,
424 &compat_type);
425 glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed);
426 const FormatProperties properties{
427 .compatibility_class = static_cast<GLenum>(compat_class),
428 .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE,
429 .is_compressed = is_compressed == GL_TRUE,
430 };
431 format_properties[i].emplace(format, properties);
432 }
433 }
434 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
435 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
436 null_image_3d.Create(GL_TEXTURE_3D);
437 null_image_rect.Create(GL_TEXTURE_RECTANGLE);
438 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
439 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
440 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
441 glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
442
443 std::array<GLuint, 4> new_handles;
444 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
445 null_image_view_1d.handle = new_handles[0];
446 null_image_view_2d.handle = new_handles[1];
447 null_image_view_2d_array.handle = new_handles[2];
448 null_image_view_cube.handle = new_handles[3];
449 glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1,
450 0, 1);
451 glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0,
452 1, 0, 1);
453 glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY,
454 null_image_cube_array.handle, GL_R8, 0, 1, 0, 1);
455 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
456 GL_R8, 0, 1, 0, 6);
457 const std::array texture_handles{
458 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
459 null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
460 null_image_view_2d_array.handle, null_image_view_cube.handle,
461 };
462 for (const GLuint handle : texture_handles) {
463 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
464 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
465 }
466 const auto set_view = [this](ImageViewType type, GLuint handle) {
467 if (device.HasDebuggingToolAttached()) {
468 const std::string name = fmt::format("NullImage {}", type);
469 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
470 }
471 null_image_views[static_cast<size_t>(type)] = handle;
472 };
473 set_view(ImageViewType::e1D, null_image_view_1d.handle);
474 set_view(ImageViewType::e2D, null_image_view_2d.handle);
475 set_view(ImageViewType::Cube, null_image_view_cube.handle);
476 set_view(ImageViewType::e3D, null_image_3d.handle);
477 set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
478 set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
479 set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
480 set_view(ImageViewType::Rect, null_image_rect.handle);
481}
289 482
290void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { 483TextureCacheRuntime::~TextureCacheRuntime() = default;
291 MICROPROFILE_SCOPE(OpenGL_Texture_Download);
292 484
293 if (params.IsBuffer()) { 485void TextureCacheRuntime::Finish() {
294 glGetNamedBufferSubData(texture_buffer.handle, 0, 486 glFinish();
295 static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), 487}
296 staging_buffer.data());
297 return;
298 }
299 488
300 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); 489ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
490 return upload_buffers.RequestMap(size, true);
491}
301 492
302 for (u32 level = 0; level < params.emulated_levels; ++level) { 493ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
303 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); 494 return download_buffers.RequestMap(size, false);
304 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 495}
305 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
306 496
307 u8* const mip_data = staging_buffer.data() + mip_offset; 497void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
308 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); 498 std::span<const ImageCopy> copies) {
309 if (is_compressed) { 499 const GLuint dst_name = dst_image.Handle();
310 glGetCompressedTextureImage(texture.handle, level, size, mip_data); 500 const GLuint src_name = src_image.Handle();
311 } else { 501 const GLenum dst_target = ImageTarget(dst_image.info);
312 glGetTextureImage(texture.handle, level, format, type, size, mip_data); 502 const GLenum src_target = ImageTarget(src_image.info);
313 } 503 for (const ImageCopy& copy : copies) {
504 const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target);
505 const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target);
506 const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target);
507 glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y,
508 src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x,
509 dst_origin.y, dst_origin.z, region.width, region.height, region.depth);
314 } 510 }
315} 511}
316 512
317void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { 513bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
318 MICROPROFILE_SCOPE(OpenGL_Texture_Upload); 514 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
319 SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); 515 return false;
320 for (u32 level = 0; level < params.emulated_levels; ++level) {
321 UploadTextureMipmap(level, staging_buffer);
322 } 516 }
517 return true;
323} 518}
324 519
325void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { 520void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
326 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); 521 std::span<const ImageCopy> copies) {
327 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 522 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
328 523 ASSERT(src.info.type == ImageType::e3D);
329 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); 524 util_shaders.CopyBC4(dst, src, copies);
330 const u8* buffer{staging_buffer.data() + mip_offset};
331 if (is_compressed) {
332 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
333 switch (params.target) {
334 case SurfaceTarget::Texture2D:
335 glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
336 static_cast<GLsizei>(params.GetMipWidth(level)),
337 static_cast<GLsizei>(params.GetMipHeight(level)),
338 internal_format, image_size, buffer);
339 break;
340 case SurfaceTarget::Texture3D:
341 case SurfaceTarget::Texture2DArray:
342 case SurfaceTarget::TextureCubeArray:
343 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
344 static_cast<GLsizei>(params.GetMipWidth(level)),
345 static_cast<GLsizei>(params.GetMipHeight(level)),
346 static_cast<GLsizei>(params.GetMipDepth(level)),
347 internal_format, image_size, buffer);
348 break;
349 case SurfaceTarget::TextureCubemap: {
350 const std::size_t layer_size{params.GetHostLayerSize(level)};
351 for (std::size_t face = 0; face < params.depth; ++face) {
352 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
353 static_cast<GLsizei>(params.GetMipWidth(level)),
354 static_cast<GLsizei>(params.GetMipHeight(level)), 1,
355 internal_format, static_cast<GLsizei>(layer_size),
356 buffer);
357 buffer += layer_size;
358 }
359 break;
360 }
361 default:
362 UNREACHABLE();
363 }
364 } else { 525 } else {
365 switch (params.target) { 526 UNREACHABLE();
366 case SurfaceTarget::Texture1D:
367 glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
368 buffer);
369 break;
370 case SurfaceTarget::TextureBuffer:
371 ASSERT(level == 0);
372 glNamedBufferSubData(texture_buffer.handle, 0,
373 params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
374 break;
375 case SurfaceTarget::Texture1DArray:
376 case SurfaceTarget::Texture2D:
377 glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
378 params.GetMipHeight(level), format, type, buffer);
379 break;
380 case SurfaceTarget::Texture3D:
381 case SurfaceTarget::Texture2DArray:
382 case SurfaceTarget::TextureCubeArray:
383 glTextureSubImage3D(
384 texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
385 static_cast<GLsizei>(params.GetMipHeight(level)),
386 static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
387 break;
388 case SurfaceTarget::TextureCubemap:
389 for (std::size_t face = 0; face < params.depth; ++face) {
390 glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
391 params.GetMipWidth(level), params.GetMipHeight(level), 1,
392 format, type, buffer);
393 buffer += params.GetHostLayerSize(level);
394 }
395 break;
396 default:
397 UNREACHABLE();
398 }
399 } 527 }
400} 528}
401 529
402void CachedSurface::DecorateSurfaceName() { 530void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
403 LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); 531 const std::array<Offset2D, 2>& dst_region,
404} 532 const std::array<Offset2D, 2>& src_region,
533 Tegra::Engines::Fermi2D::Filter filter,
534 Tegra::Engines::Fermi2D::Operation operation) {
535 state_tracker.NotifyScissor0();
536 state_tracker.NotifyRasterizeEnable();
537 state_tracker.NotifyFramebufferSRGB();
405 538
406void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { 539 ASSERT(dst->BufferBits() == src->BufferBits());
407 LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); 540
541 glEnable(GL_FRAMEBUFFER_SRGB);
542 glDisable(GL_RASTERIZER_DISCARD);
543 glDisablei(GL_SCISSOR_TEST, 0);
544
545 const GLbitfield buffer_bits = dst->BufferBits();
546 const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
547 const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
548 glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y,
549 src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y,
550 dst_region[1].x, dst_region[1].y, buffer_bits,
551 is_linear ? GL_LINEAR : GL_NEAREST);
408} 552}
409 553
410View CachedSurface::CreateView(const ViewParams& view_key) { 554void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
411 return CreateViewInner(view_key, false); 555 size_t buffer_offset,
556 std::span<const SwizzleParameters> swizzles) {
557 switch (image.info.type) {
558 case ImageType::e2D:
559 return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
560 case ImageType::e3D:
561 return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
562 case ImageType::Linear:
563 return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
564 default:
565 UNREACHABLE();
566 break;
567 }
412} 568}
413 569
414View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { 570void TextureCacheRuntime::InsertUploadMemoryBarrier() {
415 auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); 571 glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
416 views[view_key] = view;
417 if (!is_proxy)
418 view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
419 return view;
420} 572}
421 573
422CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, 574FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const {
423 bool is_proxy) 575 switch (type) {
424 : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format}, 576 case ImageType::e1D:
425 target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { 577 return format_properties[0].at(internal_format);
426 if (!is_proxy) { 578 case ImageType::e2D:
427 main_view = CreateTextureView(); 579 case ImageType::Linear:
580 return format_properties[1].at(internal_format);
581 case ImageType::e3D:
582 return format_properties[2].at(internal_format);
583 default:
584 UNREACHABLE();
585 return FormatProperties{};
428 } 586 }
429} 587}
430 588
431CachedSurfaceView::~CachedSurfaceView() = default; 589TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
590 : storage_flags{storage_flags_}, map_flags{map_flags_} {}
432 591
433void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { 592TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
434 ASSERT(params.num_levels == 1);
435 593
436 if (params.target == SurfaceTarget::Texture3D) { 594ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
437 if (params.num_layers > 1) { 595 bool insert_fence) {
438 ASSERT(params.base_layer == 0); 596 const size_t index = RequestBuffer(requested_size);
439 glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); 597 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
440 } else { 598 return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
441 glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, 599}
442 params.base_level, params.base_layer); 600
443 } 601size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
444 return; 602 if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
603 return *index;
445 } 604 }
446 605
447 if (params.num_layers > 1) { 606 OGLBuffer& buffer = buffers.emplace_back();
448 UNIMPLEMENTED_IF(params.base_layer != 0); 607 buffer.Create();
449 glFramebufferTexture(fb_target, attachment, GetTexture(), 0); 608 glNamedBufferStorage(buffer.handle, requested_size, nullptr,
450 return; 609 storage_flags | GL_MAP_PERSISTENT_BIT);
610 maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
611 map_flags | GL_MAP_PERSISTENT_BIT)));
612
613 syncs.emplace_back();
614 sizes.push_back(requested_size);
615
616 ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
617 maps.size() == sizes.size());
618
619 return buffers.size() - 1;
620}
621
622std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
623 size_t smallest_buffer = std::numeric_limits<size_t>::max();
624 std::optional<size_t> found;
625 const size_t num_buffers = sizes.size();
626 for (size_t index = 0; index < num_buffers; ++index) {
627 const size_t buffer_size = sizes[index];
628 if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
629 continue;
630 }
631 if (syncs[index].handle != 0) {
632 GLint status;
633 glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
634 if (status != GL_SIGNALED) {
635 continue;
636 }
637 syncs[index].Release();
638 }
639 smallest_buffer = buffer_size;
640 found = index;
451 } 641 }
642 return found;
643}
452 644
453 const GLenum view_target = surface.GetTarget(); 645Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
454 const GLuint texture = surface.GetTexture(); 646 VAddr cpu_addr_)
455 switch (surface.GetSurfaceParams().target) { 647 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
456 case SurfaceTarget::Texture1D: 648 if (CanBeAccelerated(runtime, info)) {
457 glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); 649 flags |= ImageFlagBits::AcceleratedUpload;
650 }
651 if (IsConverted(runtime.device, info.format, info.type)) {
652 flags |= ImageFlagBits::Converted;
653 gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
654 gl_store_format = GL_RGBA8;
655 gl_format = GL_RGBA;
656 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
657 } else {
658 const auto& tuple = GetFormatTuple(info.format);
659 gl_internal_format = tuple.internal_format;
660 gl_store_format = tuple.store_format;
661 gl_format = tuple.format;
662 gl_type = tuple.type;
663 }
664 const GLenum target = ImageTarget(info);
665 const GLsizei width = info.size.width;
666 const GLsizei height = info.size.height;
667 const GLsizei depth = info.size.depth;
668 const int max_host_mip_levels = std::bit_width(info.size.width);
669 const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
670 const GLsizei num_layers = info.resources.layers;
671 const GLsizei num_samples = info.num_samples;
672
673 GLuint handle = 0;
674 if (target != GL_TEXTURE_BUFFER) {
675 texture.Create(target);
676 handle = texture.handle;
677 }
678 switch (target) {
679 case GL_TEXTURE_1D_ARRAY:
680 glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers);
458 break; 681 break;
459 case SurfaceTarget::Texture2D: 682 case GL_TEXTURE_2D_ARRAY:
460 glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); 683 glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers);
461 break; 684 break;
462 case SurfaceTarget::Texture1DArray: 685 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
463 case SurfaceTarget::Texture2DArray: 686 // TODO: Where should 'fixedsamplelocations' come from?
464 case SurfaceTarget::TextureCubemap: 687 const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
465 case SurfaceTarget::TextureCubeArray: 688 glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x,
466 glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, 689 height >> samples_y, num_layers, GL_FALSE);
467 params.base_layer); 690 break;
691 }
692 case GL_TEXTURE_RECTANGLE:
693 glTextureStorage2D(handle, num_levels, gl_store_format, width, height);
694 break;
695 case GL_TEXTURE_3D:
696 glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth);
697 break;
698 case GL_TEXTURE_BUFFER:
699 buffer.Create();
700 glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
468 break; 701 break;
469 default: 702 default:
470 UNIMPLEMENTED(); 703 UNREACHABLE_MSG("Invalid target=0x{:x}", target);
704 break;
705 }
706 if (runtime.device.HasDebuggingToolAttached()) {
707 const std::string name = VideoCommon::Name(*this);
708 glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
709 static_cast<GLsizei>(name.size()), name.data());
471 } 710 }
472} 711}
473 712
474GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, 713void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
475 SwizzleSource z_source, SwizzleSource w_source) { 714 std::span<const VideoCommon::BufferImageCopy> copies) {
476 if (GetSurfaceParams().IsBuffer()) { 715 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
477 return GetTexture(); 716 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
478 }
479 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
480 if (current_swizzle == new_swizzle) {
481 return current_view;
482 }
483 current_swizzle = new_swizzle;
484 717
485 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); 718 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
486 OGLTextureView& view = entry->second;
487 if (!is_cache_miss) {
488 current_view = view.handle;
489 return view.handle;
490 }
491 view = CreateTextureView();
492 current_view = view.handle;
493 719
494 std::array swizzle{x_source, y_source, z_source, w_source}; 720 u32 current_row_length = std::numeric_limits<u32>::max();
721 u32 current_image_height = std::numeric_limits<u32>::max();
495 722
496 switch (const PixelFormat format = GetSurfaceParams().pixel_format) { 723 for (const VideoCommon::BufferImageCopy& copy : copies) {
497 case PixelFormat::D24_UNORM_S8_UINT: 724 if (current_row_length != copy.buffer_row_length) {
498 case PixelFormat::D32_FLOAT_S8_UINT: 725 current_row_length = copy.buffer_row_length;
499 case PixelFormat::S8_UINT_D24_UNORM: 726 glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
500 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); 727 }
501 glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, 728 if (current_image_height != copy.buffer_image_height) {
502 GetComponent(format, x_source == SwizzleSource::R)); 729 current_image_height = copy.buffer_image_height;
503 730 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
504 // Make sure we sample the first component 731 }
505 std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { 732 CopyBufferToImage(copy, buffer_offset);
506 return value == SwizzleSource::G ? SwizzleSource::R : value;
507 });
508 [[fallthrough]];
509 default: {
510 const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
511 GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
512 glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
513 break;
514 }
515 } 733 }
516 return view.handle;
517} 734}
518 735
519OGLTextureView CachedSurfaceView::CreateTextureView() const { 736void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
520 OGLTextureView texture_view; 737 std::span<const VideoCommon::BufferCopy> copies) {
521 texture_view.Create(); 738 for (const VideoCommon::BufferCopy& copy : copies) {
522 739 glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
523 if (target == GL_TEXTURE_3D) { 740 copy.dst_offset, copy.size);
524 glTextureView(texture_view.handle, target, surface.texture.handle, format,
525 params.base_level, params.num_levels, 0, 1);
526 } else {
527 glTextureView(texture_view.handle, target, surface.texture.handle, format,
528 params.base_level, params.num_levels, params.base_layer, params.num_layers);
529 } 741 }
530 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
531
532 return texture_view;
533} 742}
534 743
535TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, 744void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
536 Tegra::Engines::Maxwell3D& maxwell3d, 745 std::span<const VideoCommon::BufferImageCopy> copies) {
537 Tegra::MemoryManager& gpu_memory, const Device& device, 746 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
538 StateTracker& state_tracker_)
539 : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{
540 state_tracker_} {
541 src_framebuffer.Create();
542 dst_framebuffer.Create();
543}
544 747
545TextureCacheOpenGL::~TextureCacheOpenGL() = default; 748 glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
749 glPixelStorei(GL_PACK_ALIGNMENT, 1);
546 750
547Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 751 u32 current_row_length = std::numeric_limits<u32>::max();
548 return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); 752 u32 current_image_height = std::numeric_limits<u32>::max();
549}
550 753
551void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, 754 for (const VideoCommon::BufferImageCopy& copy : copies) {
552 const VideoCommon::CopyParams& copy_params) { 755 if (current_row_length != copy.buffer_row_length) {
553 const auto& src_params = src_surface->GetSurfaceParams(); 756 current_row_length = copy.buffer_row_length;
554 const auto& dst_params = dst_surface->GetSurfaceParams(); 757 glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
555 if (src_params.type != dst_params.type) { 758 }
556 // A fallback is needed 759 if (current_image_height != copy.buffer_image_height) {
557 return; 760 current_image_height = copy.buffer_image_height;
761 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
762 }
763 CopyImageToBuffer(copy, buffer_offset);
558 } 764 }
559 const auto src_handle = src_surface->GetTexture();
560 const auto src_target = src_surface->GetTarget();
561 const auto dst_handle = dst_surface->GetTexture();
562 const auto dst_target = dst_surface->GetTarget();
563 glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
564 copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
565 copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
566 copy_params.dest_z, copy_params.width, copy_params.height,
567 copy_params.depth);
568} 765}
569 766
570void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, 767void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
571 const Tegra::Engines::Fermi2D::Config& copy_config) { 768 // Compressed formats don't have a pixel format or type
572 const auto& src_params{src_view->GetSurfaceParams()}; 769 const bool is_compressed = gl_format == GL_NONE;
573 const auto& dst_params{dst_view->GetSurfaceParams()}; 770 const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset);
574 UNIMPLEMENTED_IF(src_params.depth != 1);
575 UNIMPLEMENTED_IF(dst_params.depth != 1);
576
577 state_tracker.NotifyScissor0();
578 state_tracker.NotifyFramebuffer();
579 state_tracker.NotifyRasterizeEnable();
580 state_tracker.NotifyFramebufferSRGB();
581 771
582 if (dst_params.srgb_conversion) { 772 switch (info.type) {
583 glEnable(GL_FRAMEBUFFER_SRGB); 773 case ImageType::e1D:
584 } else { 774 if (is_compressed) {
585 glDisable(GL_FRAMEBUFFER_SRGB); 775 glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
776 copy.image_offset.x, copy.image_subresource.base_layer,
777 copy.image_extent.width,
778 copy.image_subresource.num_layers, gl_internal_format,
779 static_cast<GLsizei>(copy.buffer_size), offset);
780 } else {
781 glTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
782 copy.image_offset.x, copy.image_subresource.base_layer,
783 copy.image_extent.width, copy.image_subresource.num_layers,
784 gl_format, gl_type, offset);
785 }
786 break;
787 case ImageType::e2D:
788 case ImageType::Linear:
789 if (is_compressed) {
790 glCompressedTextureSubImage3D(
791 texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
792 copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width,
793 copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format,
794 static_cast<GLsizei>(copy.buffer_size), offset);
795 } else {
796 glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
797 copy.image_offset.x, copy.image_offset.y,
798 copy.image_subresource.base_layer, copy.image_extent.width,
799 copy.image_extent.height, copy.image_subresource.num_layers,
800 gl_format, gl_type, offset);
801 }
802 break;
803 case ImageType::e3D:
804 if (is_compressed) {
805 glCompressedTextureSubImage3D(
806 texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
807 copy.image_offset.y, copy.image_offset.z, copy.image_extent.width,
808 copy.image_extent.height, copy.image_extent.depth, gl_internal_format,
809 static_cast<GLsizei>(copy.buffer_size), offset);
810 } else {
811 glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
812 copy.image_offset.x, copy.image_offset.y, copy.image_offset.z,
813 copy.image_extent.width, copy.image_extent.height,
814 copy.image_extent.depth, gl_format, gl_type, offset);
815 }
816 break;
817 default:
818 UNREACHABLE();
586 } 819 }
587 glDisable(GL_RASTERIZER_DISCARD); 820}
588 glDisablei(GL_SCISSOR_TEST, 0);
589
590 glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
591 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
592
593 GLenum buffers = 0;
594 if (src_params.type == SurfaceType::ColorTexture) {
595 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
596 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
597 0);
598
599 dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
600 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
601 0);
602
603 buffers = GL_COLOR_BUFFER_BIT;
604 } else if (src_params.type == SurfaceType::Depth) {
605 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
606 src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
607 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
608 821
609 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 822void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
610 dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); 823 const GLint x_offset = copy.image_offset.x;
611 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 824 const GLsizei width = copy.image_extent.width;
612 825
613 buffers = GL_DEPTH_BUFFER_BIT; 826 const GLint level = copy.image_subresource.base_level;
614 } else if (src_params.type == SurfaceType::DepthStencil) { 827 const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size);
615 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 828 void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset);
616 src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
617 829
618 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 830 GLint y_offset = 0;
619 dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); 831 GLint z_offset = 0;
832 GLsizei height = 1;
833 GLsizei depth = 1;
620 834
621 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; 835 switch (info.type) {
836 case ImageType::e1D:
837 y_offset = copy.image_subresource.base_layer;
838 height = copy.image_subresource.num_layers;
839 break;
840 case ImageType::e2D:
841 case ImageType::Linear:
842 y_offset = copy.image_offset.y;
843 z_offset = copy.image_subresource.base_layer;
844 height = copy.image_extent.height;
845 depth = copy.image_subresource.num_layers;
846 break;
847 case ImageType::e3D:
848 y_offset = copy.image_offset.y;
849 z_offset = copy.image_offset.z;
850 height = copy.image_extent.height;
851 depth = copy.image_extent.depth;
852 break;
853 default:
854 UNREACHABLE();
855 }
856 // Compressed formats don't have a pixel format or type
857 const bool is_compressed = gl_format == GL_NONE;
858 if (is_compressed) {
859 glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width,
860 height, depth, buffer_size, offset);
861 } else {
862 glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height,
863 depth, gl_format, gl_type, buffer_size, offset);
622 } 864 }
623
624 const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
625 const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
626 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
627
628 glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
629 static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
630 static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
631 static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
632 buffers,
633 is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
634} 865}
635 866
636void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { 867ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
637 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); 868 ImageId image_id_, Image& image)
638 const auto& src_params = src_surface->GetSurfaceParams(); 869 : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
639 const auto& dst_params = dst_surface->GetSurfaceParams(); 870 const Device& device = runtime.device;
640 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); 871 if (True(image.flags & ImageFlagBits::Converted)) {
872 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
873 } else {
874 internal_format = GetFormatTuple(format).internal_format;
875 }
876 VideoCommon::SubresourceRange flatten_range = info.range;
877 std::array<GLuint, 2> handles;
878 stored_views.reserve(2);
641 879
642 const auto source_format = GetFormatTuple(src_params.pixel_format); 880 switch (info.type) {
643 const auto dest_format = GetFormatTuple(dst_params.pixel_format); 881 case ImageViewType::e1DArray:
882 flatten_range.extent.layers = 1;
883 [[fallthrough]];
884 case ImageViewType::e1D:
885 glGenTextures(2, handles.data());
886 SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
887 SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
888 break;
889 case ImageViewType::e2DArray:
890 flatten_range.extent.layers = 1;
891 [[fallthrough]];
892 case ImageViewType::e2D:
893 if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
894 // 2D and 2D array views on a 3D textures are used exclusively for render targets
895 ASSERT(info.range.extent.levels == 1);
896 const VideoCommon::SubresourceRange slice_range{
897 .base = {.level = info.range.base.level, .layer = 0},
898 .extent = {.levels = 1, .layers = 1},
899 };
900 glGenTextures(1, handles.data());
901 SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
902 break;
903 }
904 glGenTextures(2, handles.data());
905 SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
906 SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
907 break;
908 case ImageViewType::e3D:
909 glGenTextures(1, handles.data());
910 SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
911 break;
912 case ImageViewType::CubeArray:
913 flatten_range.extent.layers = 6;
914 [[fallthrough]];
915 case ImageViewType::Cube:
916 glGenTextures(2, handles.data());
917 SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
918 SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
919 break;
920 case ImageViewType::Rect:
921 glGenTextures(1, handles.data());
922 SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
923 break;
924 case ImageViewType::Buffer:
925 glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
926 SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
927 break;
928 }
929 default_handle = Handle(info.type);
930}
644 931
645 const std::size_t source_size = src_surface->GetHostSizeInBytes(); 932ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
646 const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); 933 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
647 934
648 const std::size_t buffer_size = std::max(source_size, dest_size); 935void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
936 GLuint handle, const VideoCommon::ImageViewInfo& info,
937 VideoCommon::SubresourceRange view_range) {
938 if (info.type == ImageViewType::Buffer) {
939 // TODO: Take offset from buffer cache
940 glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
941 image.guest_size_bytes);
942 } else {
943 const GLuint parent = image.texture.handle;
944 const GLenum target = ImageTarget(view_type, image.info.num_samples);
945 glTextureView(handle, target, parent, internal_format, view_range.base.level,
946 view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
947 if (!info.IsRenderTarget()) {
948 ApplySwizzle(handle, format, info.Swizzle());
949 }
950 }
951 if (device.HasDebuggingToolAttached()) {
952 const std::string name = VideoCommon::Name(*this, view_type);
953 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
954 }
955 stored_views.emplace_back().handle = handle;
956 views[static_cast<size_t>(view_type)] = handle;
957}
649 958
650 GLuint copy_pbo_handle = FetchPBO(buffer_size); 959Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
960 const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE;
961 const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func);
962 const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None);
963 const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter);
964 const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter);
965 const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE;
966
967 UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
968 UNIMPLEMENTED_IF(config.float_coord_normalization != 0);
969
970 sampler.Create();
971 const GLuint handle = sampler.handle;
972 glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
973 glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
974 glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
975 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
976 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
977 glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
978 glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
979 glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
980 glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
981 glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
982 glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
983
984 if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
985 glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
986 } else {
987 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
988 }
989 if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
990 glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
991 } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
992 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
993 }
994 if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
995 glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
996 } else if (seamless == GL_FALSE) {
997 // We default to false because it's more common
998 LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
999 }
1000}
651 1001
652 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); 1002Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
1003 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
1004 // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
1005 // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
1006 // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
1007 // mismatching size, this is why core framebuffers are preferred.
1008 GLuint handle;
1009 glGenFramebuffers(1, &handle);
1010 glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
1011
1012 GLsizei num_buffers = 0;
1013 std::array<GLenum, NUM_RT> gl_draw_buffers;
1014 gl_draw_buffers.fill(GL_NONE);
1015
1016 for (size_t index = 0; index < color_buffers.size(); ++index) {
1017 const ImageView* const image_view = color_buffers[index];
1018 if (!image_view) {
1019 continue;
1020 }
1021 buffer_bits |= GL_COLOR_BUFFER_BIT;
1022 gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index];
1023 num_buffers = static_cast<GLsizei>(index + 1);
653 1024
654 if (src_surface->IsCompressed()) { 1025 const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index);
655 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), 1026 AttachTexture(handle, attachment, image_view);
656 nullptr);
657 } else {
658 glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
659 static_cast<GLsizei>(source_size), nullptr);
660 } 1027 }
661 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
662 1028
663 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); 1029 if (const ImageView* const image_view = depth_buffer; image_view) {
1030 if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
1031 buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1032 } else {
1033 buffer_bits |= GL_DEPTH_BUFFER_BIT;
1034 }
1035 const GLenum attachment = AttachmentType(image_view->format);
1036 AttachTexture(handle, attachment, image_view);
1037 }
664 1038
665 const GLsizei width = static_cast<GLsizei>(dst_params.width); 1039 if (num_buffers > 1) {
666 const GLsizei height = static_cast<GLsizei>(dst_params.height); 1040 glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data());
667 const GLsizei depth = static_cast<GLsizei>(dst_params.depth); 1041 } else if (num_buffers > 0) {
668 if (dst_surface->IsCompressed()) { 1042 glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]);
669 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
670 UNREACHABLE();
671 } else { 1043 } else {
672 switch (dst_params.target) { 1044 glNamedFramebufferDrawBuffer(handle, GL_NONE);
673 case SurfaceTarget::Texture1D:
674 glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
675 dest_format.type, nullptr);
676 break;
677 case SurfaceTarget::Texture2D:
678 glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
679 dest_format.format, dest_format.type, nullptr);
680 break;
681 case SurfaceTarget::Texture3D:
682 case SurfaceTarget::Texture2DArray:
683 case SurfaceTarget::TextureCubeArray:
684 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
685 dest_format.format, dest_format.type, nullptr);
686 break;
687 case SurfaceTarget::TextureCubemap:
688 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
689 dest_format.format, dest_format.type, nullptr);
690 break;
691 default:
692 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
693 static_cast<u32>(dst_params.target));
694 UNREACHABLE();
695 }
696 } 1045 }
697 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
698 1046
699 glTextureBarrier(); 1047 glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width);
700} 1048 glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height);
1049 // TODO
1050 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...);
1051 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...);
1052 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...);
701 1053
702GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { 1054 if (runtime.device.HasDebuggingToolAttached()) {
703 ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); 1055 const std::string name = VideoCommon::Name(key);
704 const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); 1056 glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
705 OGLBuffer& cp = copy_pbo_cache[l2];
706 if (cp.handle == 0) {
707 const std::size_t ceil_size = 1ULL << l2;
708 cp.Create();
709 cp.MakeStreamCopy(ceil_size);
710 } 1057 }
711 return cp.handle; 1058 framebuffer.handle = handle;
712} 1059}
713 1060
714} // namespace OpenGL 1061} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 7787134fc..04193e31e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -4,156 +4,247 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <functional>
9#include <memory> 7#include <memory>
10#include <unordered_map> 8#include <span>
11#include <utility>
12#include <vector>
13 9
14#include <glad/glad.h> 10#include <glad/glad.h>
15 11
16#include "common/common_types.h"
17#include "video_core/engines/shader_bytecode.h"
18#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/util_shaders.h"
20#include "video_core/texture_cache/texture_cache.h" 14#include "video_core/texture_cache/texture_cache.h"
21 15
22namespace OpenGL { 16namespace OpenGL {
23 17
24using VideoCommon::SurfaceParams; 18class Device;
25using VideoCommon::ViewParams; 19class ProgramManager;
26
27class CachedSurfaceView;
28class CachedSurface;
29class TextureCacheOpenGL;
30class StateTracker; 20class StateTracker;
31 21
32using Surface = std::shared_ptr<CachedSurface>; 22class Framebuffer;
33using View = std::shared_ptr<CachedSurfaceView>; 23class Image;
34using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; 24class ImageView;
25class Sampler;
35 26
36class CachedSurface final : public VideoCommon::SurfaceBase<View> { 27using VideoCommon::ImageId;
37 friend CachedSurfaceView; 28using VideoCommon::ImageViewId;
29using VideoCommon::ImageViewType;
30using VideoCommon::NUM_RT;
31using VideoCommon::Offset2D;
32using VideoCommon::RenderTargets;
38 33
34class ImageBufferMap {
39public: 35public:
40 explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported); 36 explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
41 ~CachedSurface(); 37 ~ImageBufferMap();
42
43 void UploadTexture(const std::vector<u8>& staging_buffer) override;
44 void DownloadTexture(std::vector<u8>& staging_buffer) override;
45 38
46 GLenum GetTarget() const { 39 GLuint Handle() const noexcept {
47 return target; 40 return handle;
48 } 41 }
49 42
50 GLuint GetTexture() const { 43 std::span<u8> Span() const noexcept {
51 return texture.handle; 44 return span;
52 } 45 }
53 46
54 bool IsCompressed() const { 47private:
55 return is_compressed; 48 std::span<u8> span;
49 OGLSync* sync;
50 GLuint handle;
51};
52
53struct FormatProperties {
54 GLenum compatibility_class;
55 bool compatibility_by_size;
56 bool is_compressed;
57};
58
59class TextureCacheRuntime {
60 friend Framebuffer;
61 friend Image;
62 friend ImageView;
63 friend Sampler;
64
65public:
66 explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
67 StateTracker& state_tracker);
68 ~TextureCacheRuntime();
69
70 void Finish();
71
72 ImageBufferMap MapUploadBuffer(size_t size);
73
74 ImageBufferMap MapDownloadBuffer(size_t size);
75
76 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
77
78 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
79 UNIMPLEMENTED();
56 } 80 }
57 81
58protected: 82 bool CanImageBeCopied(const Image& dst, const Image& src);
59 void DecorateSurfaceName() override; 83
84 void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
85
86 void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
87 const std::array<Offset2D, 2>& dst_region,
88 const std::array<Offset2D, 2>& src_region,
89 Tegra::Engines::Fermi2D::Filter filter,
90 Tegra::Engines::Fermi2D::Operation operation);
60 91
61 View CreateView(const ViewParams& view_key) override; 92 void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
62 View CreateViewInner(const ViewParams& view_key, bool is_proxy); 93 std::span<const VideoCommon::SwizzleParameters> swizzles);
94
95 void InsertUploadMemoryBarrier();
96
97 FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
63 98
64private: 99private:
65 void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); 100 struct StagingBuffers {
101 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
102 ~StagingBuffers();
66 103
67 GLenum internal_format{}; 104 ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
68 GLenum format{};
69 GLenum type{};
70 bool is_compressed{};
71 GLenum target{};
72 u32 view_count{};
73 105
74 OGLTexture texture; 106 size_t RequestBuffer(size_t requested_size);
75 OGLBuffer texture_buffer; 107
108 std::optional<size_t> FindBuffer(size_t requested_size);
109
110 std::vector<OGLSync> syncs;
111 std::vector<OGLBuffer> buffers;
112 std::vector<u8*> maps;
113 std::vector<size_t> sizes;
114 GLenum storage_flags;
115 GLenum map_flags;
116 };
117
118 const Device& device;
119 StateTracker& state_tracker;
120 UtilShaders util_shaders;
121
122 std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
123
124 StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
125 StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
126
127 OGLTexture null_image_1d_array;
128 OGLTexture null_image_cube_array;
129 OGLTexture null_image_3d;
130 OGLTexture null_image_rect;
131 OGLTextureView null_image_view_1d;
132 OGLTextureView null_image_view_2d;
133 OGLTextureView null_image_view_2d_array;
134 OGLTextureView null_image_view_cube;
135
136 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
76}; 137};
77 138
78class CachedSurfaceView final : public VideoCommon::ViewBase { 139class Image : public VideoCommon::ImageBase {
140 friend ImageView;
141
79public: 142public:
80 explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); 143 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
81 ~CachedSurfaceView(); 144 VAddr cpu_addr);
82 145
83 /// @brief Attaches this texture view to the currently bound fb_target framebuffer 146 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
84 /// @param attachment Attachment to bind textures to 147 std::span<const VideoCommon::BufferImageCopy> copies);
85 /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
86 void Attach(GLenum attachment, GLenum fb_target) const;
87 148
88 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, 149 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
89 Tegra::Texture::SwizzleSource y_source, 150 std::span<const VideoCommon::BufferCopy> copies);
90 Tegra::Texture::SwizzleSource z_source,
91 Tegra::Texture::SwizzleSource w_source);
92 151
93 void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); 152 void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
153 std::span<const VideoCommon::BufferImageCopy> copies);
94 154
95 void MarkAsModified(u64 tick) { 155 GLuint Handle() const noexcept {
96 surface.MarkAsModified(true, tick); 156 return texture.handle;
97 } 157 }
98 158
99 GLuint GetTexture() const { 159private:
100 if (is_proxy) { 160 void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
101 return surface.GetTexture(); 161
102 } 162 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
103 return main_view.handle; 163
164 OGLTexture texture;
165 OGLTextureView store_view;
166 OGLBuffer buffer;
167 GLenum gl_internal_format = GL_NONE;
168 GLenum gl_store_format = GL_NONE;
169 GLenum gl_format = GL_NONE;
170 GLenum gl_type = GL_NONE;
171};
172
173class ImageView : public VideoCommon::ImageViewBase {
174 friend Image;
175
176public:
177 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
178 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
179
180 [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
181 return views[static_cast<size_t>(query_type)];
104 } 182 }
105 183
106 GLenum GetFormat() const { 184 [[nodiscard]] GLuint DefaultHandle() const noexcept {
107 return format; 185 return default_handle;
108 } 186 }
109 187
110 const SurfaceParams& GetSurfaceParams() const { 188 [[nodiscard]] GLenum Format() const noexcept {
111 return surface.GetSurfaceParams(); 189 return internal_format;
112 } 190 }
113 191
114private: 192private:
115 OGLTextureView CreateTextureView() const; 193 void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
194 const VideoCommon::ImageViewInfo& info,
195 VideoCommon::SubresourceRange view_range);
196
197 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
198 std::vector<OGLTextureView> stored_views;
199 GLuint default_handle = 0;
200 GLenum internal_format = GL_NONE;
201};
202
203class ImageAlloc : public VideoCommon::ImageAllocBase {};
116 204
117 CachedSurface& surface; 205class Sampler {
118 const GLenum format; 206public:
119 const GLenum target; 207 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
120 const bool is_proxy;
121 208
122 std::unordered_map<u32, OGLTextureView> view_cache; 209 GLuint Handle() const noexcept {
123 OGLTextureView main_view; 210 return sampler.handle;
211 }
124 212
125 // Use an invalid default so it always fails the comparison test 213private:
126 u32 current_swizzle = 0xffffffff; 214 OGLSampler sampler;
127 GLuint current_view = 0;
128}; 215};
129 216
130class TextureCacheOpenGL final : public TextureCacheBase { 217class Framebuffer {
131public: 218public:
132 explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, 219 explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
133 Tegra::Engines::Maxwell3D& maxwell3d, 220 ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
134 Tegra::MemoryManager& gpu_memory, const Device& device,
135 StateTracker& state_tracker);
136 ~TextureCacheOpenGL();
137
138protected:
139 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
140
141 void ImageCopy(Surface& src_surface, Surface& dst_surface,
142 const VideoCommon::CopyParams& copy_params) override;
143 221
144 void ImageBlit(View& src_view, View& dst_view, 222 [[nodiscard]] GLuint Handle() const noexcept {
145 const Tegra::Engines::Fermi2D::Config& copy_config) override; 223 return framebuffer.handle;
224 }
146 225
147 void BufferCopy(Surface& src_surface, Surface& dst_surface) override; 226 [[nodiscard]] GLbitfield BufferBits() const noexcept {
227 return buffer_bits;
228 }
148 229
149private: 230private:
150 GLuint FetchPBO(std::size_t buffer_size); 231 OGLFramebuffer framebuffer;
151 232 GLbitfield buffer_bits = GL_NONE;
152 StateTracker& state_tracker; 233};
153 234
154 OGLFramebuffer src_framebuffer; 235struct TextureCacheParams {
155 OGLFramebuffer dst_framebuffer; 236 static constexpr bool ENABLE_VALIDATION = true;
156 std::unordered_map<u32, OGLBuffer> copy_pbo_cache; 237 static constexpr bool FRAMEBUFFER_BLITS = true;
238 static constexpr bool HAS_EMULATED_COPIES = true;
239
240 using Runtime = OpenGL::TextureCacheRuntime;
241 using Image = OpenGL::Image;
242 using ImageAlloc = OpenGL::ImageAlloc;
243 using ImageView = OpenGL::ImageView;
244 using Sampler = OpenGL::Sampler;
245 using Framebuffer = OpenGL::Framebuffer;
157}; 246};
158 247
248using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
249
159} // namespace OpenGL 250} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a8be2aa37..cbccfdeb4 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -107,7 +107,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
107 case Maxwell::IndexFormat::UnsignedInt: 107 case Maxwell::IndexFormat::UnsignedInt:
108 return GL_UNSIGNED_INT; 108 return GL_UNSIGNED_INT;
109 } 109 }
110 UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format)); 110 UNREACHABLE_MSG("Invalid index_format={}", index_format);
111 return {}; 111 return {};
112} 112}
113 113
@@ -144,7 +144,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
144 case Maxwell::PrimitiveTopology::Patches: 144 case Maxwell::PrimitiveTopology::Patches:
145 return GL_PATCHES; 145 return GL_PATCHES;
146 } 146 }
147 UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology)); 147 UNREACHABLE_MSG("Invalid topology={}", topology);
148 return GL_POINTS; 148 return GL_POINTS;
149} 149}
150 150
@@ -172,8 +172,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
172 } 172 }
173 break; 173 break;
174 } 174 }
175 UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", 175 UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode,
176 static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode)); 176 mipmap_filter_mode);
177 return GL_NEAREST; 177 return GL_NEAREST;
178} 178}
179 179
@@ -204,7 +204,7 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
204 return GL_MIRROR_CLAMP_TO_EDGE; 204 return GL_MIRROR_CLAMP_TO_EDGE;
205 } 205 }
206 } 206 }
207 UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); 207 UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", wrap_mode);
208 return GL_REPEAT; 208 return GL_REPEAT;
209} 209}
210 210
@@ -227,7 +227,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
227 case Tegra::Texture::DepthCompareFunc::Always: 227 case Tegra::Texture::DepthCompareFunc::Always:
228 return GL_ALWAYS; 228 return GL_ALWAYS;
229 } 229 }
230 UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func)); 230 UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", func);
231 return GL_GREATER; 231 return GL_GREATER;
232} 232}
233 233
@@ -249,7 +249,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
249 case Maxwell::Blend::Equation::MaxGL: 249 case Maxwell::Blend::Equation::MaxGL:
250 return GL_MAX; 250 return GL_MAX;
251 } 251 }
252 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); 252 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
253 return GL_FUNC_ADD; 253 return GL_FUNC_ADD;
254} 254}
255 255
@@ -313,7 +313,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
313 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: 313 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
314 return GL_ONE_MINUS_CONSTANT_ALPHA; 314 return GL_ONE_MINUS_CONSTANT_ALPHA;
315 } 315 }
316 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); 316 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
317 return GL_ZERO; 317 return GL_ZERO;
318} 318}
319 319
@@ -333,7 +333,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
333 case Tegra::Texture::SwizzleSource::OneFloat: 333 case Tegra::Texture::SwizzleSource::OneFloat:
334 return GL_ONE; 334 return GL_ONE;
335 } 335 }
336 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source)); 336 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
337 return GL_ZERO; 337 return GL_ZERO;
338} 338}
339 339
@@ -364,7 +364,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
364 case Maxwell::ComparisonOp::AlwaysOld: 364 case Maxwell::ComparisonOp::AlwaysOld:
365 return GL_ALWAYS; 365 return GL_ALWAYS;
366 } 366 }
367 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); 367 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
368 return GL_ALWAYS; 368 return GL_ALWAYS;
369} 369}
370 370
@@ -395,7 +395,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
395 case Maxwell::StencilOp::DecrWrapOGL: 395 case Maxwell::StencilOp::DecrWrapOGL:
396 return GL_DECR_WRAP; 396 return GL_DECR_WRAP;
397 } 397 }
398 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil)); 398 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil);
399 return GL_KEEP; 399 return GL_KEEP;
400} 400}
401 401
@@ -406,7 +406,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
406 case Maxwell::FrontFace::CounterClockWise: 406 case Maxwell::FrontFace::CounterClockWise:
407 return GL_CCW; 407 return GL_CCW;
408 } 408 }
409 UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face)); 409 UNIMPLEMENTED_MSG("Unimplemented front face cull={}", front_face);
410 return GL_CCW; 410 return GL_CCW;
411} 411}
412 412
@@ -419,7 +419,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
419 case Maxwell::CullFace::FrontAndBack: 419 case Maxwell::CullFace::FrontAndBack:
420 return GL_FRONT_AND_BACK; 420 return GL_FRONT_AND_BACK;
421 } 421 }
422 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); 422 UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
423 return GL_BACK; 423 return GL_BACK;
424} 424}
425 425
@@ -458,7 +458,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
458 case Maxwell::LogicOperation::Set: 458 case Maxwell::LogicOperation::Set:
459 return GL_SET; 459 return GL_SET;
460 } 460 }
461 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation)); 461 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", operation);
462 return GL_COPY; 462 return GL_COPY;
463} 463}
464 464
@@ -471,10 +471,23 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
471 case Maxwell::PolygonMode::Fill: 471 case Maxwell::PolygonMode::Fill:
472 return GL_FILL; 472 return GL_FILL;
473 } 473 }
474 UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode)); 474 UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode);
475 return GL_FILL; 475 return GL_FILL;
476} 476}
477 477
478inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
479 switch (filter) {
480 case Tegra::Texture::SamplerReduction::WeightedAverage:
481 return GL_WEIGHTED_AVERAGE_ARB;
482 case Tegra::Texture::SamplerReduction::Min:
483 return GL_MIN;
484 case Tegra::Texture::SamplerReduction::Max:
485 return GL_MAX;
486 }
487 UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
488 return GL_WEIGHTED_AVERAGE_ARB;
489}
490
478inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { 491inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
479 // Enumeration order matches register order. We can convert it arithmetically. 492 // Enumeration order matches register order. We can convert it arithmetically.
480 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); 493 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c869bb0e2..dd77a543c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -23,10 +23,10 @@
23#include "core/telemetry_session.h" 23#include "core/telemetry_session.h"
24#include "video_core/host_shaders/opengl_present_frag.h" 24#include "video_core/host_shaders/opengl_present_frag.h"
25#include "video_core/host_shaders/opengl_present_vert.h" 25#include "video_core/host_shaders/opengl_present_vert.h"
26#include "video_core/morton.h"
27#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
28#include "video_core/renderer_opengl/gl_shader_manager.h" 27#include "video_core/renderer_opengl/gl_shader_manager.h"
29#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/textures/decoders.h"
30 30
31namespace OpenGL { 31namespace OpenGL {
32 32
@@ -130,8 +130,8 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
130RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, 130RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
131 Core::Frontend::EmuWindow& emu_window_, 131 Core::Frontend::EmuWindow& emu_window_,
132 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 132 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
133 std::unique_ptr<Core::Frontend::GraphicsContext> context) 133 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
134 : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_}, 134 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
135 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} 135 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
136 136
137RendererOpenGL::~RendererOpenGL() = default; 137RendererOpenGL::~RendererOpenGL() = default;
@@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
140 if (!framebuffer) { 140 if (!framebuffer) {
141 return; 141 return;
142 } 142 }
143
144 PrepareRendertarget(framebuffer); 143 PrepareRendertarget(framebuffer);
145 RenderScreenshot(); 144 RenderScreenshot();
146 145
147 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); 146 state_tracker.BindFramebuffer(0);
148 DrawScreen(emu_window.GetFramebufferLayout()); 147 DrawScreen(emu_window.GetFramebufferLayout());
149 148
150 ++m_current_frame; 149 ++m_current_frame;
@@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
187 // Reset the screen info's display texture to its own permanent texture 186 // Reset the screen info's display texture to its own permanent texture
188 screen_info.display_texture = screen_info.texture.resource.handle; 187 screen_info.display_texture = screen_info.texture.resource.handle;
189 188
190 const auto pixel_format{
191 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
192 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
193 const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
194 u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
195 rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
196
197 // TODO(Rodrigo): Read this from HLE 189 // TODO(Rodrigo): Read this from HLE
198 constexpr u32 block_height_log2 = 4; 190 constexpr u32 block_height_log2 = 4;
199 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, 191 const auto pixel_format{
200 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, 192 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
201 gl_framebuffer_data.data(), host_ptr); 193 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
202 194 const u64 size_in_bytes{Tegra::Texture::CalculateSize(
195 true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
196 const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
197 const std::span<const u8> input_data(host_ptr, size_in_bytes);
198 Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
199 framebuffer.width, framebuffer.height, 1, block_height_log2,
200 0);
201
202 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
203 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 203 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
204 204
205 // Update existing texture 205 // Update existing texture
@@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
238 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); 238 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
239 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); 239 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
240 240
241 // Generate presentation sampler
242 present_sampler.Create();
243 glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
244
241 // Generate VBO handle for drawing 245 // Generate VBO handle for drawing
242 vertex_buffer.Create(); 246 vertex_buffer.Create();
243 247
@@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
255 // Clear screen to black 259 // Clear screen to black
256 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 260 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
257 261
262 // Enable seamless cubemaps when per texture parameters are not available
263 if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
264 glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
265 }
266
258 // Enable unified vertex attributes and query vertex buffer address when the driver supports it 267 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
259 if (device.HasVertexBufferUnifiedMemory()) { 268 if (device.HasVertexBufferUnifiedMemory()) {
260 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); 269 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
@@ -275,9 +284,9 @@ void RendererOpenGL::AddTelemetryFields() {
275 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); 284 LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
276 285
277 constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; 286 constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
278 telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor); 287 telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
279 telemetry_session.AddField(user_system, "GPU_Model", gpu_model); 288 telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
280 telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version); 289 telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
281} 290}
282 291
283void RendererOpenGL::CreateRasterizer() { 292void RendererOpenGL::CreateRasterizer() {
@@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
296 305
297 const auto pixel_format{ 306 const auto pixel_format{
298 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; 307 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
299 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; 308 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
300 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); 309 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
301 310
302 GLint internal_format; 311 GLint internal_format;
@@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
315 internal_format = GL_RGBA8; 324 internal_format = GL_RGBA8;
316 texture.gl_format = GL_RGBA; 325 texture.gl_format = GL_RGBA;
317 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 326 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
318 UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", 327 // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
319 static_cast<u32>(framebuffer.pixel_format)); 328 // static_cast<u32>(framebuffer.pixel_format));
320 } 329 }
321 330
322 texture.resource.Release(); 331 texture.resource.Release();
@@ -348,7 +357,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
348 } else { 357 } else {
349 // Other transformations are unsupported 358 // Other transformations are unsupported
350 LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", 359 LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
351 static_cast<u32>(framebuffer_transform_flags)); 360 framebuffer_transform_flags);
352 UNIMPLEMENTED(); 361 UNIMPLEMENTED();
353 } 362 }
354 } 363 }
@@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
382 state_tracker.NotifyPolygonModes(); 391 state_tracker.NotifyPolygonModes();
383 state_tracker.NotifyViewport0(); 392 state_tracker.NotifyViewport0();
384 state_tracker.NotifyScissor0(); 393 state_tracker.NotifyScissor0();
385 state_tracker.NotifyColorMask0(); 394 state_tracker.NotifyColorMask(0);
386 state_tracker.NotifyBlend0(); 395 state_tracker.NotifyBlend0();
387 state_tracker.NotifyFramebuffer(); 396 state_tracker.NotifyFramebuffer();
388 state_tracker.NotifyFrontFace(); 397 state_tracker.NotifyFrontFace();
@@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
440 } 449 }
441 450
442 glBindTextureUnit(0, screen_info.display_texture); 451 glBindTextureUnit(0, screen_info.display_texture);
443 glBindSampler(0, 0); 452 glBindSampler(0, present_sampler.handle);
444 453
445 glClear(GL_COLOR_BUFFER_BIT); 454 glClear(GL_COLOR_BUFFER_BIT);
446 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 455 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
473 482
474 DrawScreen(layout); 483 DrawScreen(layout);
475 484
485 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
486 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
476 glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 487 glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
477 renderer_settings.screenshot_bits); 488 renderer_settings.screenshot_bits);
478 489
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 9ef181f95..44e109794 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -57,10 +57,10 @@ struct ScreenInfo {
57 57
58class RendererOpenGL final : public VideoCore::RendererBase { 58class RendererOpenGL final : public VideoCore::RendererBase {
59public: 59public:
60 explicit RendererOpenGL(Core::TelemetrySession& telemetry_session, 60 explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
61 Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, 61 Core::Frontend::EmuWindow& emu_window_,
62 Tegra::GPU& gpu, 62 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
63 std::unique_ptr<Core::Frontend::GraphicsContext> context); 63 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
64 ~RendererOpenGL() override; 64 ~RendererOpenGL() override;
65 65
66 bool Init() override; 66 bool Init() override;
@@ -102,6 +102,7 @@ private:
102 StateTracker state_tracker{gpu}; 102 StateTracker state_tracker{gpu};
103 103
104 // OpenGL object IDs 104 // OpenGL object IDs
105 OGLSampler present_sampler;
105 OGLBuffer vertex_buffer; 106 OGLBuffer vertex_buffer;
106 OGLProgram vertex_program; 107 OGLProgram vertex_program;
107 OGLProgram fragment_program; 108 OGLProgram fragment_program;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
new file mode 100644
index 000000000..eb849cbf2
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -0,0 +1,224 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <bit>
6#include <span>
7#include <string_view>
8
9#include <glad/glad.h>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "common/div_ceil.h"
14#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
15#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/pitch_unswizzle_comp.h"
18#include "video_core/renderer_opengl/gl_resource_manager.h"
19#include "video_core/renderer_opengl/gl_shader_manager.h"
20#include "video_core/renderer_opengl/gl_texture_cache.h"
21#include "video_core/renderer_opengl/util_shaders.h"
22#include "video_core/surface.h"
23#include "video_core/texture_cache/accelerated_swizzle.h"
24#include "video_core/texture_cache/types.h"
25#include "video_core/texture_cache/util.h"
26#include "video_core/textures/decoders.h"
27
28namespace OpenGL {
29
30using namespace HostShaders;
31
32using VideoCommon::Extent3D;
33using VideoCommon::ImageCopy;
34using VideoCommon::ImageType;
35using VideoCommon::SwizzleParameters;
36using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
37using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
38using VideoCore::Surface::BytesPerBlock;
39
40namespace {
41
42OGLProgram MakeProgram(std::string_view source) {
43 OGLShader shader;
44 shader.Create(source, GL_COMPUTE_SHADER);
45
46 OGLProgram program;
47 program.Create(true, false, shader.handle);
48 return program;
49}
50
51} // Anonymous namespace
52
53UtilShaders::UtilShaders(ProgramManager& program_manager_)
54 : program_manager{program_manager_},
55 block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
56 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
57 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
58 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
59 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
60 swizzle_table_buffer.Create();
61 glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
62}
63
64UtilShaders::~UtilShaders() = default;
65
66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
67 std::span<const SwizzleParameters> swizzles) {
68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
70 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
72
73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
74 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
76
77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
78 for (const SwizzleParameters& swizzle : swizzles) {
79 const Extent3D num_tiles = swizzle.num_tiles;
80 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
81
82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
84
85 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
86 glUniform3uiv(0, 1, params.origin.data());
87 glUniform3iv(1, 1, params.destination.data());
88 glUniform1ui(2, params.bytes_per_block_log2);
89 glUniform1ui(3, params.layer_stride);
90 glUniform1ui(4, params.block_size);
91 glUniform1ui(5, params.x_shift);
92 glUniform1ui(6, params.block_height);
93 glUniform1ui(7, params.block_height_mask);
94 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
95 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
96 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
97 GL_WRITE_ONLY, store_format);
98 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
99 }
100 program_manager.RestoreGuestCompute();
101}
102
103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
104 std::span<const SwizzleParameters> swizzles) {
105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
106
107 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
108 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
110
111 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
114
115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
116 for (const SwizzleParameters& swizzle : swizzles) {
117 const Extent3D num_tiles = swizzle.num_tiles;
118 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
119
120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
122 const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
123
124 const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
125 glUniform3uiv(0, 1, params.origin.data());
126 glUniform3iv(1, 1, params.destination.data());
127 glUniform1ui(2, params.bytes_per_block_log2);
128 glUniform1ui(3, params.slice_size);
129 glUniform1ui(4, params.block_size);
130 glUniform1ui(5, params.x_shift);
131 glUniform1ui(6, params.block_height);
132 glUniform1ui(7, params.block_height_mask);
133 glUniform1ui(8, params.block_depth);
134 glUniform1ui(9, params.block_depth_mask);
135 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
136 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
137 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
138 GL_WRITE_ONLY, store_format);
139 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
140 }
141 program_manager.RestoreGuestCompute();
142}
143
144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
145 std::span<const SwizzleParameters> swizzles) {
146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
147 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
148 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
149 static constexpr GLuint LOC_ORIGIN = 0;
150 static constexpr GLuint LOC_DESTINATION = 1;
151 static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
152 static constexpr GLuint LOC_PITCH = 3;
153
154 const u32 bytes_per_block = BytesPerBlock(image.info.format);
155 const GLenum format = StoreFormat(bytes_per_block);
156 const u32 pitch = image.info.pitch;
157
158 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
159 "Non-power of two images are not implemented");
160
161 program_manager.BindHostCompute(pitch_unswizzle_program.handle);
162 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
163 glUniform2ui(LOC_ORIGIN, 0, 0);
164 glUniform2i(LOC_DESTINATION, 0, 0);
165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
166 glUniform1ui(LOC_PITCH, pitch);
167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
168 for (const SwizzleParameters& swizzle : swizzles) {
169 const Extent3D num_tiles = swizzle.num_tiles;
170 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
171
172 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
173 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
174
175 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
176 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
177 glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
178 }
179 program_manager.RestoreGuestCompute();
180}
181
182void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
183 static constexpr GLuint BINDING_INPUT_IMAGE = 0;
184 static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
185 static constexpr GLuint LOC_SRC_OFFSET = 0;
186 static constexpr GLuint LOC_DST_OFFSET = 1;
187
188 program_manager.BindHostCompute(copy_bc4_program.handle);
189
190 for (const ImageCopy& copy : copies) {
191 ASSERT(copy.src_subresource.base_layer == 0);
192 ASSERT(copy.src_subresource.num_layers == 1);
193 ASSERT(copy.dst_subresource.base_layer == 0);
194 ASSERT(copy.dst_subresource.num_layers == 1);
195
196 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
197 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
198 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
199 GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
200 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
201 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
202 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
203 }
204 program_manager.RestoreGuestCompute();
205}
206
207GLenum StoreFormat(u32 bytes_per_block) {
208 switch (bytes_per_block) {
209 case 1:
210 return GL_R8UI;
211 case 2:
212 return GL_R16UI;
213 case 4:
214 return GL_R32UI;
215 case 8:
216 return GL_RG32UI;
217 case 16:
218 return GL_RGBA32UI;
219 }
220 UNREACHABLE();
221 return GL_R8UI;
222}
223
224} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
new file mode 100644
index 000000000..359997255
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/texture_cache/types.h"
14
15namespace OpenGL {
16
17class Image;
18class ImageBufferMap;
19class ProgramManager;
20
21class UtilShaders {
22public:
23 explicit UtilShaders(ProgramManager& program_manager);
24 ~UtilShaders();
25
26 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
27 std::span<const VideoCommon::SwizzleParameters> swizzles);
28
29 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
30 std::span<const VideoCommon::SwizzleParameters> swizzles);
31
32 void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
33 std::span<const VideoCommon::SwizzleParameters> swizzles);
34
35 void CopyBC4(Image& dst_image, Image& src_image,
36 std::span<const VideoCommon::ImageCopy> copies);
37
38private:
39 ProgramManager& program_manager;
40
41 OGLBuffer swizzle_table_buffer;
42
43 OGLProgram block_linear_unswizzle_2d_program;
44 OGLProgram block_linear_unswizzle_3d_program;
45 OGLProgram pitch_unswizzle_program;
46 OGLProgram copy_bc4_program;
47};
48
49GLenum StoreFormat(u32 bytes_per_block);
50
51} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
deleted file mode 100644
index 6d7bb16b2..000000000
--- a/src/video_core/renderer_opengl/utils.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <vector>
7
8#include <fmt/format.h>
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
18 if (!GLAD_GL_KHR_debug) {
19 // We don't need to throw an error as this is just for debugging
20 return;
21 }
22
23 std::string object_label;
24 if (extra_info.empty()) {
25 switch (identifier) {
26 case GL_TEXTURE:
27 object_label = fmt::format("Texture@0x{:016X}", addr);
28 break;
29 case GL_PROGRAM:
30 object_label = fmt::format("Shader@0x{:016X}", addr);
31 break;
32 default:
33 object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
34 break;
35 }
36 } else {
37 object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
38 }
39 glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
40}
41
42} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
deleted file mode 100644
index 9c09ee12c..000000000
--- a/src/video_core/renderer_opengl/utils.h
+++ /dev/null
@@ -1,16 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string_view>
8#include <vector>
9#include <glad/glad.h>
10#include "common/common_types.h"
11
12namespace OpenGL {
13
14void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
15
16} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
new file mode 100644
index 000000000..87c8e5693
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -0,0 +1,624 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
8#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
9#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
10#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
11#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
12#include "video_core/renderer_vulkan/blit_image.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_shader_util.h"
17#include "video_core/renderer_vulkan/vk_state_tracker.h"
18#include "video_core/renderer_vulkan/vk_texture_cache.h"
19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20#include "video_core/renderer_vulkan/wrapper.h"
21#include "video_core/surface.h"
22
23namespace Vulkan {
24
25using VideoCommon::ImageViewType;
26
27namespace {
28struct PushConstants {
29 std::array<float, 2> tex_scale;
30 std::array<float, 2> tex_offset;
31};
32
33template <u32 binding>
34inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{
35 .binding = binding,
36 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
37 .descriptorCount = 1,
38 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
39 .pImmutableSamplers = nullptr,
40};
41constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
42 TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
43 TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>,
44};
45constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
46 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
47 .pNext = nullptr,
48 .flags = 0,
49 .bindingCount = 1,
50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
51};
52constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
54 .pNext = nullptr,
55 .flags = 0,
56 .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
57 .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
58};
59constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
60 .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
61 .offset = 0,
62 .size = sizeof(PushConstants),
63};
64constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
65 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
66 .pNext = nullptr,
67 .flags = 0,
68 .vertexBindingDescriptionCount = 0,
69 .pVertexBindingDescriptions = nullptr,
70 .vertexAttributeDescriptionCount = 0,
71 .pVertexAttributeDescriptions = nullptr,
72};
73constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
74 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
75 .pNext = nullptr,
76 .flags = 0,
77 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
78 .primitiveRestartEnable = VK_FALSE,
79};
80constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
81 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
82 .pNext = nullptr,
83 .flags = 0,
84 .viewportCount = 1,
85 .pViewports = nullptr,
86 .scissorCount = 1,
87 .pScissors = nullptr,
88};
89constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
90 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
91 .pNext = nullptr,
92 .flags = 0,
93 .depthClampEnable = VK_FALSE,
94 .rasterizerDiscardEnable = VK_FALSE,
95 .polygonMode = VK_POLYGON_MODE_FILL,
96 .cullMode = VK_CULL_MODE_BACK_BIT,
97 .frontFace = VK_FRONT_FACE_CLOCKWISE,
98 .depthBiasEnable = VK_FALSE,
99 .depthBiasConstantFactor = 0.0f,
100 .depthBiasClamp = 0.0f,
101 .depthBiasSlopeFactor = 0.0f,
102 .lineWidth = 1.0f,
103};
104constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
105 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
106 .pNext = nullptr,
107 .flags = 0,
108 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
109 .sampleShadingEnable = VK_FALSE,
110 .minSampleShading = 0.0f,
111 .pSampleMask = nullptr,
112 .alphaToCoverageEnable = VK_FALSE,
113 .alphaToOneEnable = VK_FALSE,
114};
115constexpr std::array DYNAMIC_STATES{
116 VK_DYNAMIC_STATE_VIEWPORT,
117 VK_DYNAMIC_STATE_SCISSOR,
118};
119constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
120 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
121 .pNext = nullptr,
122 .flags = 0,
123 .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
124 .pDynamicStates = DYNAMIC_STATES.data(),
125};
126constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
128 .pNext = nullptr,
129 .flags = 0,
130 .logicOpEnable = VK_FALSE,
131 .logicOp = VK_LOGIC_OP_CLEAR,
132 .attachmentCount = 0,
133 .pAttachments = nullptr,
134 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
135};
136constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
137 .blendEnable = VK_FALSE,
138 .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
139 .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
140 .colorBlendOp = VK_BLEND_OP_ADD,
141 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
142 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
143 .alphaBlendOp = VK_BLEND_OP_ADD,
144 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
145 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
146};
147constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
148 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
149 .pNext = nullptr,
150 .flags = 0,
151 .logicOpEnable = VK_FALSE,
152 .logicOp = VK_LOGIC_OP_CLEAR,
153 .attachmentCount = 1,
154 .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
155 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
156};
157constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
158 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
159 .pNext = nullptr,
160 .flags = 0,
161 .depthTestEnable = VK_TRUE,
162 .depthWriteEnable = VK_TRUE,
163 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
164 .depthBoundsTestEnable = VK_FALSE,
165 .stencilTestEnable = VK_FALSE,
166 .front = VkStencilOpState{},
167 .back = VkStencilOpState{},
168 .minDepthBounds = 0.0f,
169 .maxDepthBounds = 0.0f,
170};
171
172template <VkFilter filter>
173inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
174 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
175 .pNext = nullptr,
176 .flags = 0,
177 .magFilter = filter,
178 .minFilter = filter,
179 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
180 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
181 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
182 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
183 .mipLodBias = 0.0f,
184 .anisotropyEnable = VK_FALSE,
185 .maxAnisotropy = 0.0f,
186 .compareEnable = VK_FALSE,
187 .compareOp = VK_COMPARE_OP_NEVER,
188 .minLod = 0.0f,
189 .maxLod = 0.0f,
190 .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
191 .unnormalizedCoordinates = VK_TRUE,
192};
193
194constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
195 const VkDescriptorSetLayout* set_layout) {
196 return VkPipelineLayoutCreateInfo{
197 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
198 .pNext = nullptr,
199 .flags = 0,
200 .setLayoutCount = 1,
201 .pSetLayouts = set_layout,
202 .pushConstantRangeCount = 1,
203 .pPushConstantRanges = &PUSH_CONSTANT_RANGE,
204 };
205}
206
207constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,
208 VkShaderModule shader) {
209 return VkPipelineShaderStageCreateInfo{
210 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
211 .pNext = nullptr,
212 .flags = 0,
213 .stage = stage,
214 .module = shader,
215 .pName = "main",
216 .pSpecializationInfo = nullptr,
217 };
218}
219
220constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
221 VkShaderModule vertex_shader, VkShaderModule fragment_shader) {
222 return std::array{
223 PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader),
224 PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader),
225 };
226}
227
228void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
229 VkSampler sampler, VkImageView image_view) {
230 const VkDescriptorImageInfo image_info{
231 .sampler = sampler,
232 .imageView = image_view,
233 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
234 };
235 const VkWriteDescriptorSet write_descriptor_set{
236 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
237 .pNext = nullptr,
238 .dstSet = descriptor_set,
239 .dstBinding = 0,
240 .dstArrayElement = 0,
241 .descriptorCount = 1,
242 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
243 .pImageInfo = &image_info,
244 .pBufferInfo = nullptr,
245 .pTexelBufferView = nullptr,
246 };
247 device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
248}
249
250void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
251 VkSampler sampler, VkImageView image_view_0,
252 VkImageView image_view_1) {
253 const VkDescriptorImageInfo image_info_0{
254 .sampler = sampler,
255 .imageView = image_view_0,
256 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
257 };
258 const VkDescriptorImageInfo image_info_1{
259 .sampler = sampler,
260 .imageView = image_view_1,
261 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
262 };
263 const std::array write_descriptor_sets{
264 VkWriteDescriptorSet{
265 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
266 .pNext = nullptr,
267 .dstSet = descriptor_set,
268 .dstBinding = 0,
269 .dstArrayElement = 0,
270 .descriptorCount = 1,
271 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
272 .pImageInfo = &image_info_0,
273 .pBufferInfo = nullptr,
274 .pTexelBufferView = nullptr,
275 },
276 VkWriteDescriptorSet{
277 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
278 .pNext = nullptr,
279 .dstSet = descriptor_set,
280 .dstBinding = 1,
281 .dstArrayElement = 0,
282 .descriptorCount = 1,
283 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
284 .pImageInfo = &image_info_1,
285 .pBufferInfo = nullptr,
286 .pTexelBufferView = nullptr,
287 },
288 };
289 device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
290}
291
292void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
293 const std::array<Offset2D, 2>& dst_region,
294 const std::array<Offset2D, 2>& src_region) {
295 const VkOffset2D offset{
296 .x = std::min(dst_region[0].x, dst_region[1].x),
297 .y = std::min(dst_region[0].y, dst_region[1].y),
298 };
299 const VkExtent2D extent{
300 .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
301 .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
302 };
303 const VkViewport viewport{
304 .x = static_cast<float>(offset.x),
305 .y = static_cast<float>(offset.y),
306 .width = static_cast<float>(extent.width),
307 .height = static_cast<float>(extent.height),
308 .minDepth = 0.0f,
309 .maxDepth = 1.0f,
310 };
311 // TODO: Support scissored blits
312 const VkRect2D scissor{
313 .offset = offset,
314 .extent = extent,
315 };
316 const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
317 const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
318 const PushConstants push_constants{
319 .tex_scale = {scale_x, scale_y},
320 .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
321 };
322 cmdbuf.SetViewport(0, viewport);
323 cmdbuf.SetScissor(0, scissor);
324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
325}
326
327} // Anonymous namespace
328
329BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_,
330 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
331 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
332 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
333 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
334 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
335 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
336 one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
337 two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
338 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
339 PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
340 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
341 PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
342 full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
343 blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
344 convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
345 convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
346 linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
347 nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
348 if (device.IsExtShaderStencilExportSupported()) {
349 blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
350 }
351}
352
353BlitImageHelper::~BlitImageHelper() = default;
354
355void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
356 const std::array<Offset2D, 2>& dst_region,
357 const std::array<Offset2D, 2>& src_region,
358 Tegra::Engines::Fermi2D::Filter filter,
359 Tegra::Engines::Fermi2D::Operation operation) {
360 const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
361 const BlitImagePipelineKey key{
362 .renderpass = dst_framebuffer->RenderPass(),
363 .operation = operation,
364 };
365 const VkPipelineLayout layout = *one_texture_pipeline_layout;
366 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
367 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
368 const VkPipeline pipeline = FindOrEmplacePipeline(key);
369 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
370 scheduler.RequestRenderpass(dst_framebuffer);
371 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
372 &device = device](vk::CommandBuffer cmdbuf) {
373 // TODO: Barriers
374 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
375 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
376 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
377 nullptr);
378 BindBlitState(cmdbuf, layout, dst_region, src_region);
379 cmdbuf.Draw(3, 1, 0, 0);
380 });
381 scheduler.InvalidateState();
382}
383
384void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
385 VkImageView src_depth_view, VkImageView src_stencil_view,
386 const std::array<Offset2D, 2>& dst_region,
387 const std::array<Offset2D, 2>& src_region,
388 Tegra::Engines::Fermi2D::Filter filter,
389 Tegra::Engines::Fermi2D::Operation operation) {
390 ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
391 ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
392
393 const VkPipelineLayout layout = *two_textures_pipeline_layout;
394 const VkSampler sampler = *nearest_sampler;
395 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
396 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
397 scheduler.RequestRenderpass(dst_framebuffer);
398 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
399 src_stencil_view, descriptor_set,
400 &device = device](vk::CommandBuffer cmdbuf) {
401 // TODO: Barriers
402 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
403 src_stencil_view);
404 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
405 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
406 nullptr);
407 BindBlitState(cmdbuf, layout, dst_region, src_region);
408 cmdbuf.Draw(3, 1, 0, 0);
409 });
410 scheduler.InvalidateState();
411}
412
413void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
414 const ImageView& src_image_view) {
415 ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
416 Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
417}
418
419void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
420 const ImageView& src_image_view) {
421
422 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
423 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
424}
425
426void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
427 const ImageView& src_image_view) {
428 ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
429 Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
430}
431
432void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
433 const ImageView& src_image_view) {
434 ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
435 Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
436}
437
438void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
439 const ImageView& src_image_view) {
440 const VkPipelineLayout layout = *one_texture_pipeline_layout;
441 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
442 const VkSampler sampler = *nearest_sampler;
443 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
444 const VkExtent2D extent{
445 .width = src_image_view.size.width,
446 .height = src_image_view.size.height,
447 };
448 scheduler.RequestRenderpass(dst_framebuffer);
449 scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
450 &device = device](vk::CommandBuffer cmdbuf) {
451 const VkOffset2D offset{
452 .x = 0,
453 .y = 0,
454 };
455 const VkViewport viewport{
456 .x = 0.0f,
457 .y = 0.0f,
458 .width = static_cast<float>(extent.width),
459 .height = static_cast<float>(extent.height),
460 .minDepth = 0.0f,
461 .maxDepth = 0.0f,
462 };
463 const VkRect2D scissor{
464 .offset = offset,
465 .extent = extent,
466 };
467 const PushConstants push_constants{
468 .tex_scale = {viewport.width, viewport.height},
469 .tex_offset = {0.0f, 0.0f},
470 };
471 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
472
473 // TODO: Barriers
474 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
475 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
476 nullptr);
477 cmdbuf.SetViewport(0, viewport);
478 cmdbuf.SetScissor(0, scissor);
479 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
480 cmdbuf.Draw(3, 1, 0, 0);
481 });
482 scheduler.InvalidateState();
483}
484
485VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
486 const auto it = std::ranges::find(blit_color_keys, key);
487 if (it != blit_color_keys.end()) {
488 return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
489 }
490 blit_color_keys.push_back(key);
491
492 const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag);
493 const VkPipelineColorBlendAttachmentState blend_attachment{
494 .blendEnable = VK_FALSE,
495 .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
496 .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
497 .colorBlendOp = VK_BLEND_OP_ADD,
498 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
499 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
500 .alphaBlendOp = VK_BLEND_OP_ADD,
501 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
502 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
503 };
504 // TODO: programmable blending
505 const VkPipelineColorBlendStateCreateInfo color_blend_create_info{
506 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
507 .pNext = nullptr,
508 .flags = 0,
509 .logicOpEnable = VK_FALSE,
510 .logicOp = VK_LOGIC_OP_CLEAR,
511 .attachmentCount = 1,
512 .pAttachments = &blend_attachment,
513 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
514 };
515 blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
516 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
517 .pNext = nullptr,
518 .flags = 0,
519 .stageCount = static_cast<u32>(stages.size()),
520 .pStages = stages.data(),
521 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
522 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
523 .pTessellationState = nullptr,
524 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
525 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
526 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
527 .pDepthStencilState = nullptr,
528 .pColorBlendState = &color_blend_create_info,
529 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
530 .layout = *one_texture_pipeline_layout,
531 .renderPass = key.renderpass,
532 .subpass = 0,
533 .basePipelineHandle = VK_NULL_HANDLE,
534 .basePipelineIndex = 0,
535 }));
536 return *blit_color_pipelines.back();
537}
538
539VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
540 if (blit_depth_stencil_pipeline) {
541 return *blit_depth_stencil_pipeline;
542 }
543 const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
544 blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
545 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
546 .pNext = nullptr,
547 .flags = 0,
548 .stageCount = static_cast<u32>(stages.size()),
549 .pStages = stages.data(),
550 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
551 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
552 .pTessellationState = nullptr,
553 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
554 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
555 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
556 .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
557 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
558 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
559 .layout = *two_textures_pipeline_layout,
560 .renderPass = renderpass,
561 .subpass = 0,
562 .basePipelineHandle = VK_NULL_HANDLE,
563 .basePipelineIndex = 0,
564 });
565 return *blit_depth_stencil_pipeline;
566}
567
568void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
569 if (pipeline) {
570 return;
571 }
572 const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
573 pipeline = device.GetLogical().CreateGraphicsPipeline({
574 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
575 .pNext = nullptr,
576 .flags = 0,
577 .stageCount = static_cast<u32>(stages.size()),
578 .pStages = stages.data(),
579 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
580 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
581 .pTessellationState = nullptr,
582 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
583 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
584 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
585 .pDepthStencilState = nullptr,
586 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
587 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
588 .layout = *one_texture_pipeline_layout,
589 .renderPass = renderpass,
590 .subpass = 0,
591 .basePipelineHandle = VK_NULL_HANDLE,
592 .basePipelineIndex = 0,
593 });
594}
595
596void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
597 if (pipeline) {
598 return;
599 }
600 const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
601 pipeline = device.GetLogical().CreateGraphicsPipeline({
602 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
603 .pNext = nullptr,
604 .flags = 0,
605 .stageCount = static_cast<u32>(stages.size()),
606 .pStages = stages.data(),
607 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
608 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
609 .pTessellationState = nullptr,
610 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
611 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
612 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
613 .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
614 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
615 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
616 .layout = *one_texture_pipeline_layout,
617 .renderPass = renderpass,
618 .subpass = 0,
619 .basePipelineHandle = VK_NULL_HANDLE,
620 .basePipelineIndex = 0,
621 });
622}
623
624} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
new file mode 100644
index 000000000..2c2790bf9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -0,0 +1,97 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8
9#include "video_core/engines/fermi_2d.h"
10#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/renderer_vulkan/wrapper.h"
12#include "video_core/texture_cache/types.h"
13
14namespace Vulkan {
15
16using VideoCommon::Offset2D;
17
18class VKDevice;
19class VKScheduler;
20class StateTracker;
21
22class Framebuffer;
23class ImageView;
24
25struct BlitImagePipelineKey {
26 constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
27
28 VkRenderPass renderpass;
29 Tegra::Engines::Fermi2D::Operation operation;
30};
31
32class BlitImageHelper {
33public:
34 explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler,
35 StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
36 ~BlitImageHelper();
37
38 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
39 const std::array<Offset2D, 2>& dst_region,
40 const std::array<Offset2D, 2>& src_region,
41 Tegra::Engines::Fermi2D::Filter filter,
42 Tegra::Engines::Fermi2D::Operation operation);
43
44 void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
45 VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
46 const std::array<Offset2D, 2>& src_region,
47 Tegra::Engines::Fermi2D::Filter filter,
48 Tegra::Engines::Fermi2D::Operation operation);
49
50 void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
51
52 void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
53
54 void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
55
56 void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
57
58private:
59 void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
60 const ImageView& src_image_view);
61
62 [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
63
64 [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
65
66 void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
67
68 void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
69
70 const VKDevice& device;
71 VKScheduler& scheduler;
72 StateTracker& state_tracker;
73
74 vk::DescriptorSetLayout one_texture_set_layout;
75 vk::DescriptorSetLayout two_textures_set_layout;
76 DescriptorAllocator one_texture_descriptor_allocator;
77 DescriptorAllocator two_textures_descriptor_allocator;
78 vk::PipelineLayout one_texture_pipeline_layout;
79 vk::PipelineLayout two_textures_pipeline_layout;
80 vk::ShaderModule full_screen_vert;
81 vk::ShaderModule blit_color_to_color_frag;
82 vk::ShaderModule blit_depth_stencil_frag;
83 vk::ShaderModule convert_depth_to_float_frag;
84 vk::ShaderModule convert_float_to_depth_frag;
85 vk::Sampler linear_sampler;
86 vk::Sampler nearest_sampler;
87
88 std::vector<BlitImagePipelineKey> blit_color_keys;
89 std::vector<vk::Pipeline> blit_color_pipelines;
90 vk::Pipeline blit_depth_stencil_pipeline;
91 vk::Pipeline convert_d32_to_r32_pipeline;
92 vk::Pipeline convert_r32_to_d32_pipeline;
93 vk::Pipeline convert_d16_to_r16_pipeline;
94 vk::Pipeline convert_r16_to_d16_pipeline;
95};
96
97} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5ec43db11..67dd10500 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
60 logic_op.Assign(PackLogicOp(regs.logic_op.operation)); 60 logic_op.Assign(PackLogicOp(regs.logic_op.operation));
61 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); 61 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
62 topology.Assign(regs.draw.topology); 62 topology.Assign(regs.draw.topology);
63 msaa_mode.Assign(regs.multisample_mode);
63 64
64 raw2 = 0; 65 raw2 = 0;
65 const auto test_func = 66 const auto test_func =
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index c26b77790..7e95e6fce 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -186,6 +186,7 @@ struct FixedPipelineState {
186 BitField<19, 4, u32> logic_op; 186 BitField<19, 4, u32> logic_op;
187 BitField<23, 1, u32> rasterize_enable; 187 BitField<23, 1, u32> rasterize_enable;
188 BitField<24, 4, Maxwell::PrimitiveTopology> topology; 188 BitField<24, 4, Maxwell::PrimitiveTopology> topology;
189 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
189 }; 190 };
190 union { 191 union {
191 u32 raw2; 192 u32 raw2;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index d22de1d81..4c988429f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -26,7 +26,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter) {
26 case Tegra::Texture::TextureFilter::Linear: 26 case Tegra::Texture::TextureFilter::Linear:
27 return VK_FILTER_LINEAR; 27 return VK_FILTER_LINEAR;
28 } 28 }
29 UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter)); 29 UNREACHABLE_MSG("Invalid sampler filter={}", filter);
30 return {}; 30 return {};
31} 31}
32 32
@@ -43,7 +43,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter
43 case Tegra::Texture::TextureMipmapFilter::Linear: 43 case Tegra::Texture::TextureMipmapFilter::Linear:
44 return VK_SAMPLER_MIPMAP_MODE_LINEAR; 44 return VK_SAMPLER_MIPMAP_MODE_LINEAR;
45 } 45 }
46 UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); 46 UNREACHABLE_MSG("Invalid sampler mipmap mode={}", mipmap_filter);
47 return {}; 47 return {};
48} 48}
49 49
@@ -79,7 +79,7 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
79 UNIMPLEMENTED(); 79 UNIMPLEMENTED();
80 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; 80 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
81 default: 81 default:
82 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); 82 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", wrap_mode);
83 return {}; 83 return {};
84 } 84 }
85} 85}
@@ -103,8 +103,7 @@ VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_
103 case Tegra::Texture::DepthCompareFunc::Always: 103 case Tegra::Texture::DepthCompareFunc::Always:
104 return VK_COMPARE_OP_ALWAYS; 104 return VK_COMPARE_OP_ALWAYS;
105 } 105 }
106 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", 106 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", depth_compare_func);
107 static_cast<u32>(depth_compare_func));
108 return {}; 107 return {};
109} 108}
110 109
@@ -123,7 +122,7 @@ struct FormatTuple {
123 {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT 122 {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
124 {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT 123 {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
125 {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM 124 {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
126 {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM 125 {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM
127 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM 126 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
128 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM 127 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
129 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT 128 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
@@ -164,7 +163,7 @@ struct FormatTuple {
164 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM 163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
165 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT 164 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
166 {VK_FORMAT_UNDEFINED}, // R16G16_UINT 165 {VK_FORMAT_UNDEFINED}, // R16G16_UINT
167 {VK_FORMAT_UNDEFINED}, // R16G16_SINT 166 {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
168 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM 167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
169 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT 168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
170 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB 169 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
@@ -228,25 +227,26 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
228 227
229 auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; 228 auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
230 if (tuple.format == VK_FORMAT_UNDEFINED) { 229 if (tuple.format == VK_FORMAT_UNDEFINED) {
231 UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", 230 UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format);
232 static_cast<u32>(pixel_format));
233 return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; 231 return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
234 } 232 }
235 233
236 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively 234 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
237 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { 235 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
238 tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) 236 const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
239 ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 237 tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
240 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
241 } 238 }
242 const bool attachable = tuple.usage & Attachable; 239 const bool attachable = tuple.usage & Attachable;
243 const bool storage = tuple.usage & Storage; 240 const bool storage = tuple.usage & Storage;
244 241
245 VkFormatFeatureFlags usage; 242 VkFormatFeatureFlags usage{};
246 if (format_type == FormatType::Buffer) { 243 switch (format_type) {
244 case FormatType::Buffer:
247 usage = 245 usage =
248 VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; 246 VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
249 } else { 247 break;
248 case FormatType::Linear:
249 case FormatType::Optimal:
250 usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | 250 usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
251 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; 251 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
252 if (attachable) { 252 if (attachable) {
@@ -256,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
256 if (storage) { 256 if (storage) {
257 usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; 257 usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
258 } 258 }
259 break;
259 } 260 }
260 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; 261 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
261} 262}
@@ -275,7 +276,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
275 case Tegra::Engines::ShaderType::Compute: 276 case Tegra::Engines::ShaderType::Compute:
276 return VK_SHADER_STAGE_COMPUTE_BIT; 277 return VK_SHADER_STAGE_COMPUTE_BIT;
277 } 278 }
278 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); 279 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
279 return {}; 280 return {};
280} 281}
281 282
@@ -300,7 +301,7 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
300 case Maxwell::PrimitiveTopology::Patches: 301 case Maxwell::PrimitiveTopology::Patches:
301 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; 302 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
302 default: 303 default:
303 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); 304 UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
304 return {}; 305 return {};
305 } 306 }
306} 307}
@@ -490,8 +491,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
490 } 491 }
491 break; 492 break;
492 } 493 }
493 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type), 494 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", type, size);
494 static_cast<u32>(size));
495 return {}; 495 return {};
496} 496}
497 497
@@ -522,7 +522,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
522 case Maxwell::ComparisonOp::AlwaysOld: 522 case Maxwell::ComparisonOp::AlwaysOld:
523 return VK_COMPARE_OP_ALWAYS; 523 return VK_COMPARE_OP_ALWAYS;
524 } 524 }
525 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); 525 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
526 return {}; 526 return {};
527} 527}
528 528
@@ -539,7 +539,7 @@ VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_forma
539 case Maxwell::IndexFormat::UnsignedInt: 539 case Maxwell::IndexFormat::UnsignedInt:
540 return VK_INDEX_TYPE_UINT32; 540 return VK_INDEX_TYPE_UINT32;
541 } 541 }
542 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); 542 UNIMPLEMENTED_MSG("Unimplemented index_format={}", index_format);
543 return {}; 543 return {};
544} 544}
545 545
@@ -570,7 +570,7 @@ VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) {
570 case Maxwell::StencilOp::DecrWrapOGL: 570 case Maxwell::StencilOp::DecrWrapOGL:
571 return VK_STENCIL_OP_DECREMENT_AND_WRAP; 571 return VK_STENCIL_OP_DECREMENT_AND_WRAP;
572 } 572 }
573 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); 573 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil_op);
574 return {}; 574 return {};
575} 575}
576 576
@@ -592,7 +592,7 @@ VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) {
592 case Maxwell::Blend::Equation::MaxGL: 592 case Maxwell::Blend::Equation::MaxGL:
593 return VK_BLEND_OP_MAX; 593 return VK_BLEND_OP_MAX;
594 } 594 }
595 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); 595 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
596 return {}; 596 return {};
597} 597}
598 598
@@ -656,7 +656,7 @@ VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
656 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: 656 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
657 return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; 657 return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA;
658 } 658 }
659 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); 659 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
660 return {}; 660 return {};
661} 661}
662 662
@@ -667,7 +667,7 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face) {
667 case Maxwell::FrontFace::CounterClockWise: 667 case Maxwell::FrontFace::CounterClockWise:
668 return VK_FRONT_FACE_COUNTER_CLOCKWISE; 668 return VK_FRONT_FACE_COUNTER_CLOCKWISE;
669 } 669 }
670 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); 670 UNIMPLEMENTED_MSG("Unimplemented front face={}", front_face);
671 return {}; 671 return {};
672} 672}
673 673
@@ -680,7 +680,7 @@ VkCullModeFlags CullFace(Maxwell::CullFace cull_face) {
680 case Maxwell::CullFace::FrontAndBack: 680 case Maxwell::CullFace::FrontAndBack:
681 return VK_CULL_MODE_FRONT_AND_BACK; 681 return VK_CULL_MODE_FRONT_AND_BACK;
682 } 682 }
683 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); 683 UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
684 return {}; 684 return {};
685} 685}
686 686
@@ -700,7 +700,7 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
700 case Tegra::Texture::SwizzleSource::OneFloat: 700 case Tegra::Texture::SwizzleSource::OneFloat:
701 return VK_COMPONENT_SWIZZLE_ONE; 701 return VK_COMPONENT_SWIZZLE_ONE;
702 } 702 }
703 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); 703 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", swizzle);
704 return {}; 704 return {};
705} 705}
706 706
@@ -723,8 +723,21 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)
723 case Maxwell::ViewportSwizzle::NegativeW: 723 case Maxwell::ViewportSwizzle::NegativeW:
724 return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV; 724 return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV;
725 } 725 }
726 UNREACHABLE_MSG("Invalid swizzle={}", static_cast<int>(swizzle)); 726 UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
727 return {}; 727 return {};
728} 728}
729 729
730VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) {
731 switch (reduction) {
732 case Tegra::Texture::SamplerReduction::WeightedAverage:
733 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
734 case Tegra::Texture::SamplerReduction::Min:
735 return VK_SAMPLER_REDUCTION_MODE_MIN_EXT;
736 case Tegra::Texture::SamplerReduction::Max:
737 return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
738 }
739 UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
740 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
741}
742
730} // namespace Vulkan::MaxwellToVK 743} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7e213452f..1a90f192e 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
61 61
62VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); 62VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
63 63
64VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
65
64} // namespace Vulkan::MaxwellToVK 66} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index a2173edd2..7f521cb9b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() {
92 return library; 92 return library;
93} 93}
94 94
95std::pair<vk::Instance, u32> CreateInstance( 95std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library,
96 Common::DynamicLibrary& library, vk::InstanceDispatch& dld, 96 vk::InstanceDispatch& dld, WindowSystemType window_type,
97 WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { 97 bool enable_debug_utils, bool enable_layers) {
98 if (!library.IsOpen()) { 98 if (!library.IsOpen()) {
99 LOG_ERROR(Render_Vulkan, "Vulkan library not available"); 99 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
100 return {}; 100 return {};
@@ -133,7 +133,7 @@ std::pair<vk::Instance, u32> CreateInstance(
133 if (window_type != Core::Frontend::WindowSystemType::Headless) { 133 if (window_type != Core::Frontend::WindowSystemType::Headless) {
134 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); 134 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
135 } 135 }
136 if (enable_layers) { 136 if (enable_debug_utils) {
137 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); 137 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
138 } 138 }
139 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); 139 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@@ -243,8 +243,8 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
243RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 243RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
244 Core::Frontend::EmuWindow& emu_window, 244 Core::Frontend::EmuWindow& emu_window,
245 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 245 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
246 std::unique_ptr<Core::Frontend::GraphicsContext> context) 246 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
247 : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_}, 247 : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
248 cpu_memory{cpu_memory_}, gpu{gpu_} {} 248 cpu_memory{cpu_memory_}, gpu{gpu_} {}
249 249
250RendererVulkan::~RendererVulkan() { 250RendererVulkan::~RendererVulkan() {
@@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
287bool RendererVulkan::Init() { 287bool RendererVulkan::Init() {
288 library = OpenVulkanLibrary(); 288 library = OpenVulkanLibrary();
289 std::tie(instance, instance_version) = CreateInstance( 289 std::tie(instance, instance_version) = CreateInstance(
290 library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); 290 library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug);
291 if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { 291 if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
292 return false; 292 return false;
293 } 293 }
@@ -447,7 +447,8 @@ void RendererVulkan::Report() const {
447std::vector<std::string> RendererVulkan::EnumerateDevices() { 447std::vector<std::string> RendererVulkan::EnumerateDevices() {
448 vk::InstanceDispatch dld; 448 vk::InstanceDispatch dld;
449 Common::DynamicLibrary library = OpenVulkanLibrary(); 449 Common::DynamicLibrary library = OpenVulkanLibrary();
450 vk::Instance instance = CreateInstance(library, dld).first; 450 vk::Instance instance =
451 CreateInstance(library, dld, WindowSystemType::Headless, false, false).first;
451 if (!instance) { 452 if (!instance) {
452 return {}; 453 return {};
453 } 454 }
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 1044ca124..74642fba4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -33,10 +33,9 @@ class VKDevice;
33class VKMemoryManager; 33class VKMemoryManager;
34class VKSwapchain; 34class VKSwapchain;
35class VKScheduler; 35class VKScheduler;
36class VKImage;
37 36
38struct VKScreenInfo { 37struct VKScreenInfo {
39 VKImage* image{}; 38 VkImageView image_view{};
40 u32 width{}; 39 u32 width{};
41 u32 height{}; 40 u32 height{};
42 bool is_srgb{}; 41 bool is_srgb{};
@@ -45,9 +44,9 @@ struct VKScreenInfo {
45class RendererVulkan final : public VideoCore::RendererBase { 44class RendererVulkan final : public VideoCore::RendererBase {
46public: 45public:
47 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 46 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
48 Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, 47 Core::Frontend::EmuWindow& emu_window,
49 Tegra::GPU& gpu, 48 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
50 std::unique_ptr<Core::Frontend::GraphicsContext> context); 49 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
51 ~RendererVulkan() override; 50 ~RendererVulkan() override;
52 51
53 bool Init() override; 52 bool Init() override;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index b5b60309e..d3a83f22f 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -16,12 +16,12 @@
16#include "core/frontend/emu_window.h" 16#include "core/frontend/emu_window.h"
17#include "core/memory.h" 17#include "core/memory.h"
18#include "video_core/gpu.h" 18#include "video_core/gpu.h"
19#include "video_core/morton.h" 19#include "video_core/host_shaders/vulkan_present_frag_spv.h"
20#include "video_core/host_shaders/vulkan_present_vert_spv.h"
20#include "video_core/rasterizer_interface.h" 21#include "video_core/rasterizer_interface.h"
21#include "video_core/renderer_vulkan/renderer_vulkan.h" 22#include "video_core/renderer_vulkan/renderer_vulkan.h"
22#include "video_core/renderer_vulkan/vk_blit_screen.h" 23#include "video_core/renderer_vulkan/vk_blit_screen.h"
23#include "video_core/renderer_vulkan/vk_device.h" 24#include "video_core/renderer_vulkan/vk_device.h"
24#include "video_core/renderer_vulkan/vk_image.h"
25#include "video_core/renderer_vulkan/vk_master_semaphore.h" 25#include "video_core/renderer_vulkan/vk_master_semaphore.h"
26#include "video_core/renderer_vulkan/vk_memory_manager.h" 26#include "video_core/renderer_vulkan/vk_memory_manager.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -29,108 +29,12 @@
29#include "video_core/renderer_vulkan/vk_swapchain.h" 29#include "video_core/renderer_vulkan/vk_swapchain.h"
30#include "video_core/renderer_vulkan/wrapper.h" 30#include "video_core/renderer_vulkan/wrapper.h"
31#include "video_core/surface.h" 31#include "video_core/surface.h"
32#include "video_core/textures/decoders.h"
32 33
33namespace Vulkan { 34namespace Vulkan {
34 35
35namespace { 36namespace {
36 37
37// Generated from the "shaders/" directory, read the instructions there.
38constexpr u8 blit_vertex_code[] = {
39 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
40 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
41 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
42 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
43 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
44 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
45 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
46 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
47 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
48 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
49 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
50 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
51 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
52 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
53 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
54 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
55 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
56 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
57 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
58 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
59 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
60 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
61 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
62 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
63 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
64 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
65 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
66 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
67 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
68 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
69 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
70 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
71 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
72 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
73 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
74 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
75 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
76 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
77 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
78 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
79 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
80 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
81 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
82 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
83 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
84 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
85 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
86 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
87 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
88 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
89 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
90 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
91 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
92 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
93 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
94 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
95 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
96 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
97 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
98 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
99 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
100 0x38, 0x00, 0x01, 0x00};
101
102constexpr u8 blit_fragment_code[] = {
103 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
104 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
105 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
106 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
107 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
108 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
109 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
110 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
111 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
112 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
113 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
114 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
115 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
116 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
117 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
118 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
119 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
120 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
121 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
122 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
123 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
124 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
125 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
126 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
127 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
128 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
129 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
130 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
131 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
132 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
133
134struct ScreenRectVertex { 38struct ScreenRectVertex {
135 ScreenRectVertex() = default; 39 ScreenRectVertex() = default;
136 explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} 40 explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
@@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
173 // clang-format on 77 // clang-format on
174} 78}
175 79
176std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { 80u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
177 using namespace VideoCore::Surface; 81 using namespace VideoCore::Surface;
178 return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); 82 return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
179} 83}
180 84
181std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { 85std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
@@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
239 scheduler.Wait(resource_ticks[image_index]); 143 scheduler.Wait(resource_ticks[image_index]);
240 resource_ticks[image_index] = scheduler.CurrentTick(); 144 resource_ticks[image_index] = scheduler.CurrentTick();
241 145
242 VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); 146 UpdateDescriptorSet(image_index,
243 147 use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
244 UpdateDescriptorSet(image_index, blit_image->GetPresentView());
245 148
246 BufferData data; 149 BufferData data;
247 SetUniformData(data, framebuffer); 150 SetUniformData(data, framebuffer);
248 SetVertexData(data, framebuffer); 151 SetVertexData(data, framebuffer);
249 152
250 auto map = buffer_commit->Map(); 153 auto map = buffer_commit->Map();
251 std::memcpy(map.GetAddress(), &data, sizeof(data)); 154 std::memcpy(map.Address(), &data, sizeof(data));
252 155
253 if (!use_accelerated) { 156 if (!use_accelerated) {
254 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 157 const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
255 158
256 const auto pixel_format =
257 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
258 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 159 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
259 const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); 160 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
260 rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); 161 const size_t size_bytes = GetSizeInBytes(framebuffer);
162 rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
261 163
262 // TODO(Rodrigo): Read this from HLE 164 // TODO(Rodrigo): Read this from HLE
263 constexpr u32 block_height_log2 = 4; 165 constexpr u32 block_height_log2 = 4;
264 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, 166 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
265 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, 167 Tegra::Texture::UnswizzleTexture(
266 map.GetAddress() + image_offset, host_ptr); 168 std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes),
267 169 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
268 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
269 VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
270 170
271 const VkBufferImageCopy copy{ 171 const VkBufferImageCopy copy{
272 .bufferOffset = image_offset, 172 .bufferOffset = image_offset,
@@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
288 }, 188 },
289 }; 189 };
290 scheduler.Record( 190 scheduler.Record(
291 [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { 191 [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
292 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); 192 const VkImageMemoryBarrier base_barrier{
193 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
194 .pNext = nullptr,
195 .srcAccessMask = 0,
196 .dstAccessMask = 0,
197 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
198 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
199 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
200 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
201 .image = image,
202 .subresourceRange =
203 {
204 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
205 .baseMipLevel = 0,
206 .levelCount = 1,
207 .baseArrayLayer = 0,
208 .layerCount = 1,
209 },
210 };
211 VkImageMemoryBarrier read_barrier = base_barrier;
212 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
213 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
214 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
215
216 VkImageMemoryBarrier write_barrier = base_barrier;
217 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
218 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
219
220 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
221 0, read_barrier);
222 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
223 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
224 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
293 }); 225 });
294 } 226 }
295 map.Release(); 227 map.Release();
296 228
297 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
298 VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
299
300 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], 229 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
301 descriptor_set = descriptor_sets[image_index], buffer = *buffer, 230 descriptor_set = descriptor_sets[image_index], buffer = *buffer,
302 size = swapchain.GetSize(), pipeline = *pipeline, 231 size = swapchain.GetSize(), pipeline = *pipeline,
@@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
304 const VkClearValue clear_color{ 233 const VkClearValue clear_color{
305 .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, 234 .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
306 }; 235 };
307 236 const VkRenderPassBeginInfo renderpass_bi{
308 VkRenderPassBeginInfo renderpass_bi; 237 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
309 renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; 238 .pNext = nullptr,
310 renderpass_bi.pNext = nullptr; 239 .renderPass = renderpass,
311 renderpass_bi.renderPass = renderpass; 240 .framebuffer = framebuffer,
312 renderpass_bi.framebuffer = framebuffer; 241 .renderArea =
313 renderpass_bi.renderArea.offset.x = 0; 242 {
314 renderpass_bi.renderArea.offset.y = 0; 243 .offset = {0, 0},
315 renderpass_bi.renderArea.extent = size; 244 .extent = size,
316 renderpass_bi.clearValueCount = 1; 245 },
317 renderpass_bi.pClearValues = &clear_color; 246 .clearValueCount = 1,
318 247 .pClearValues = &clear_color,
319 VkViewport viewport; 248 };
320 viewport.x = 0.0f; 249 const VkViewport viewport{
321 viewport.y = 0.0f; 250 .x = 0.0f,
322 viewport.width = static_cast<float>(size.width); 251 .y = 0.0f,
323 viewport.height = static_cast<float>(size.height); 252 .width = static_cast<float>(size.width),
324 viewport.minDepth = 0.0f; 253 .height = static_cast<float>(size.height),
325 viewport.maxDepth = 1.0f; 254 .minDepth = 0.0f,
326 255 .maxDepth = 1.0f,
327 VkRect2D scissor; 256 };
328 scissor.offset.x = 0; 257 const VkRect2D scissor{
329 scissor.offset.y = 0; 258 .offset = {0, 0},
330 scissor.extent = size; 259 .extent = size,
331 260 };
332 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 261 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
333 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 262 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
334 cmdbuf.SetViewport(0, viewport); 263 cmdbuf.SetViewport(0, viewport);
@@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
372} 301}
373 302
374void VKBlitScreen::CreateShaders() { 303void VKBlitScreen::CreateShaders() {
375 vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); 304 vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
376 fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); 305 fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
377} 306}
378 307
379void VKBlitScreen::CreateSemaphores() { 308void VKBlitScreen::CreateSemaphores() {
@@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() {
420 349
421 const VkAttachmentReference color_attachment_ref{ 350 const VkAttachmentReference color_attachment_ref{
422 .attachment = 0, 351 .attachment = 0,
423 .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 352 .layout = VK_IMAGE_LAYOUT_GENERAL,
424 }; 353 };
425 354
426 const VkSubpassDescription subpass_description{ 355 const VkSubpassDescription subpass_description{
@@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
735 664
736void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { 665void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
737 raw_images.resize(image_count); 666 raw_images.resize(image_count);
667 raw_image_views.resize(image_count);
738 raw_buffer_commits.resize(image_count); 668 raw_buffer_commits.resize(image_count);
739 669
740 const VkImageCreateInfo ci{ 670 for (size_t i = 0; i < image_count; ++i) {
741 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 671 raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
742 .pNext = nullptr, 672 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
743 .flags = 0, 673 .pNext = nullptr,
744 .imageType = VK_IMAGE_TYPE_2D, 674 .flags = 0,
745 .format = GetFormat(framebuffer), 675 .imageType = VK_IMAGE_TYPE_2D,
746 .extent = 676 .format = GetFormat(framebuffer),
747 { 677 .extent =
748 .width = framebuffer.width, 678 {
749 .height = framebuffer.height, 679 .width = framebuffer.width,
750 .depth = 1, 680 .height = framebuffer.height,
751 }, 681 .depth = 1,
752 .mipLevels = 1, 682 },
753 .arrayLayers = 1, 683 .mipLevels = 1,
754 .samples = VK_SAMPLE_COUNT_1_BIT, 684 .arrayLayers = 1,
755 .tiling = VK_IMAGE_TILING_LINEAR, 685 .samples = VK_SAMPLE_COUNT_1_BIT,
756 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 686 .tiling = VK_IMAGE_TILING_LINEAR,
757 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 687 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
758 .queueFamilyIndexCount = 0, 688 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
759 .pQueueFamilyIndices = nullptr, 689 .queueFamilyIndexCount = 0,
760 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 690 .pQueueFamilyIndices = nullptr,
761 }; 691 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
762 692 });
763 for (std::size_t i = 0; i < image_count; ++i) { 693 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false);
764 raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); 694 raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
765 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); 695 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
696 .pNext = nullptr,
697 .flags = 0,
698 .image = *raw_images[i],
699 .viewType = VK_IMAGE_VIEW_TYPE_2D,
700 .format = GetFormat(framebuffer),
701 .components =
702 {
703 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
704 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
705 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
706 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
707 },
708 .subresourceRange =
709 {
710 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
711 .baseMipLevel = 0,
712 .levelCount = 1,
713 .baseArrayLayer = 0,
714 .layerCount = 1,
715 },
716 });
766 } 717 }
767} 718}
768 719
@@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
789 const VkDescriptorImageInfo image_info{ 740 const VkDescriptorImageInfo image_info{
790 .sampler = *sampler, 741 .sampler = *sampler,
791 .imageView = image_view, 742 .imageView = image_view,
792 .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 743 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
793 }; 744 };
794 745
795 const VkWriteDescriptorSet sampler_write{ 746 const VkWriteDescriptorSet sampler_write{
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 8f2839214..2ee374247 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -35,7 +35,6 @@ struct ScreenInfo;
35 35
36class RasterizerVulkan; 36class RasterizerVulkan;
37class VKDevice; 37class VKDevice;
38class VKImage;
39class VKScheduler; 38class VKScheduler;
40class VKSwapchain; 39class VKSwapchain;
41 40
@@ -110,7 +109,8 @@ private:
110 std::vector<u64> resource_ticks; 109 std::vector<u64> resource_ticks;
111 110
112 std::vector<vk::Semaphore> semaphores; 111 std::vector<vk::Semaphore> semaphores;
113 std::vector<std::unique_ptr<VKImage>> raw_images; 112 std::vector<vk::Image> raw_images;
113 std::vector<vk::ImageView> raw_image_views;
114 std::vector<VKMemoryCommit> raw_buffer_commits; 114 std::vector<VKMemoryCommit> raw_buffer_commits;
115 u32 raw_width = 0; 115 u32 raw_width = 0;
116 u32 raw_height = 0; 116 u32 raw_height = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d9d3da9ea..10d296c2f 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -31,20 +31,24 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | 31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; 32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
33 33
34constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
35 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
36
34std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { 37std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
35 return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); 38 return std::make_unique<VKStreamBuffer>(device, scheduler);
36} 39}
37 40
38} // Anonymous namespace 41} // Anonymous namespace
39 42
40Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, 43Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
41 VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) 44 VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
42 : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { 45 : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
46 staging_pool_} {
43 const VkBufferCreateInfo ci{ 47 const VkBufferCreateInfo ci{
44 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 48 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
45 .pNext = nullptr, 49 .pNext = nullptr,
46 .flags = 0, 50 .flags = 0,
47 .size = static_cast<VkDeviceSize>(size), 51 .size = static_cast<VkDeviceSize>(size_),
48 .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 52 .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
49 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 53 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
50 .queueFamilyIndexCount = 0, 54 .queueFamilyIndexCount = 0,
@@ -57,69 +61,86 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKSchedu
57 61
58Buffer::~Buffer() = default; 62Buffer::~Buffer() = default;
59 63
60void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { 64void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
61 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 65 const auto& staging = staging_pool.GetUnusedBuffer(data_size, true);
62 std::memcpy(staging.commit->Map(size), data, size); 66 std::memcpy(staging.commit->Map(data_size), data, data_size);
63 67
64 scheduler.RequestOutsideRenderPassOperationContext(); 68 scheduler.RequestOutsideRenderPassOperationContext();
65 69
66 const VkBuffer handle = Handle(); 70 const VkBuffer handle = Handle();
67 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { 71 scheduler.Record([staging = *staging.handle, handle, offset, data_size,
68 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); 72 &device = device](vk::CommandBuffer cmdbuf) {
69 73 const VkBufferMemoryBarrier read_barrier{
70 const VkBufferMemoryBarrier barrier{
71 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 74 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
72 .pNext = nullptr, 75 .pNext = nullptr,
73 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 76 .srcAccessMask =
74 .dstAccessMask = UPLOAD_ACCESS_BARRIERS, 77 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
78 VK_ACCESS_HOST_WRITE_BIT |
79 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
80 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
75 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 81 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
76 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 82 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
77 .buffer = handle, 83 .buffer = handle,
78 .offset = offset, 84 .offset = offset,
79 .size = size, 85 .size = data_size,
80 }; 86 };
81 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, 87 const VkBufferMemoryBarrier write_barrier{
82 barrier, {});
83 });
84}
85
86void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
87 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
88 scheduler.RequestOutsideRenderPassOperationContext();
89
90 const VkBuffer handle = Handle();
91 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
92 const VkBufferMemoryBarrier barrier{
93 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 88 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
94 .pNext = nullptr, 89 .pNext = nullptr,
95 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 90 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
96 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, 91 .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
97 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 92 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
98 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 93 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
99 .buffer = handle, 94 .buffer = handle,
100 .offset = offset, 95 .offset = offset,
101 .size = size, 96 .size = data_size,
102 }; 97 };
103 98 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
104 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | 99 0, read_barrier);
105 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 100 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
106 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 101 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
107 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); 102 write_barrier);
108 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
109 }); 103 });
104}
105
106void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
107 const auto& staging = staging_pool.GetUnusedBuffer(data_size, true);
108 scheduler.RequestOutsideRenderPassOperationContext();
109
110 const VkBuffer handle = Handle();
111 scheduler.Record(
112 [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
113 const VkBufferMemoryBarrier barrier{
114 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
115 .pNext = nullptr,
116 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
117 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
118 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
119 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
120 .buffer = handle,
121 .offset = offset,
122 .size = data_size,
123 };
124
125 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
126 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
127 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
128 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
129 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
130 });
110 scheduler.Finish(); 131 scheduler.Finish();
111 132
112 std::memcpy(data, staging.commit->Map(size), size); 133 std::memcpy(data, staging.commit->Map(data_size), data_size);
113} 134}
114 135
115void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 136void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
116 std::size_t size) { 137 std::size_t copy_size) {
117 scheduler.RequestOutsideRenderPassOperationContext(); 138 scheduler.RequestOutsideRenderPassOperationContext();
118 139
119 const VkBuffer dst_buffer = Handle(); 140 const VkBuffer dst_buffer = Handle();
120 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, 141 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
121 size](vk::CommandBuffer cmdbuf) { 142 copy_size](vk::CommandBuffer cmdbuf) {
122 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); 143 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
123 144
124 std::array<VkBufferMemoryBarrier, 2> barriers; 145 std::array<VkBufferMemoryBarrier, 2> barriers;
125 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 146 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -130,7 +151,7 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
130 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 151 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
131 barriers[0].buffer = src_buffer; 152 barriers[0].buffer = src_buffer;
132 barriers[0].offset = src_offset; 153 barriers[0].offset = src_offset;
133 barriers[0].size = size; 154 barriers[0].size = copy_size;
134 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 155 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
135 barriers[1].pNext = nullptr; 156 barriers[1].pNext = nullptr;
136 barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 157 barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
@@ -139,19 +160,19 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
139 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 160 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
140 barriers[1].buffer = dst_buffer; 161 barriers[1].buffer = dst_buffer;
141 barriers[1].offset = dst_offset; 162 barriers[1].offset = dst_offset;
142 barriers[1].size = size; 163 barriers[1].size = copy_size;
143 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, 164 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
144 barriers, {}); 165 barriers, {});
145 }); 166 });
146} 167}
147 168
148VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, 169VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
149 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 170 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
150 const VKDevice& device_, VKMemoryManager& memory_manager_, 171 const VKDevice& device_, VKMemoryManager& memory_manager_,
151 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) 172 VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
152 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory, 173 VKStagingBufferPool& staging_pool_)
153 CreateStreamBuffer(device_, 174 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
154 scheduler_)}, 175 cpu_memory_, stream_buffer_},
155 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ 176 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
156 staging_pool_} {} 177 staging_pool_} {}
157 178
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 7fb5ceedf..daf498222 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -22,15 +22,15 @@ class VKScheduler;
22class Buffer final : public VideoCommon::BufferBlock { 22class Buffer final : public VideoCommon::BufferBlock {
23public: 23public:
24 explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, 24 explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
25 VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); 25 VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
26 ~Buffer(); 26 ~Buffer();
27 27
28 void Upload(std::size_t offset, std::size_t size, const u8* data); 28 void Upload(std::size_t offset, std::size_t data_size, const u8* data);
29 29
30 void Download(std::size_t offset, std::size_t size, u8* data); 30 void Download(std::size_t offset, std::size_t data_size, u8* data);
31 31
32 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 32 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
33 std::size_t size); 33 std::size_t copy_size);
34 34
35 VkBuffer Handle() const { 35 VkBuffer Handle() const {
36 return *buffer.handle; 36 return *buffer.handle;
@@ -41,6 +41,7 @@ public:
41 } 41 }
42 42
43private: 43private:
44 const VKDevice& device;
44 VKScheduler& scheduler; 45 VKScheduler& scheduler;
45 VKStagingBufferPool& staging_pool; 46 VKStagingBufferPool& staging_pool;
46 47
@@ -52,7 +53,8 @@ public:
52 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, 53 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
53 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 54 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
54 const VKDevice& device, VKMemoryManager& memory_manager, 55 const VKDevice& device, VKMemoryManager& memory_manager,
55 VKScheduler& scheduler, VKStagingBufferPool& staging_pool); 56 VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
57 VKStagingBufferPool& staging_pool);
56 ~VKBufferCache(); 58 ~VKBufferCache();
57 59
58 BufferInfo GetEmptyBuffer(std::size_t size) override; 60 BufferInfo GetEmptyBuffer(std::size_t size) override;
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
index 6339f4fe0..8f7d6410e 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -17,8 +17,8 @@ struct CommandPool::Pool {
17 vk::CommandBuffers cmdbufs; 17 vk::CommandBuffers cmdbufs;
18}; 18};
19 19
20CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device) 20CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_)
21 : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {} 21 : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
22 22
23CommandPool::~CommandPool() = default; 23CommandPool::~CommandPool() = default;
24 24
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
index b9cb3fb5d..62a7ce3f1 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.h
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -17,7 +17,7 @@ class VKDevice;
17 17
18class CommandPool final : public ResourcePool { 18class CommandPool final : public ResourcePool {
19public: 19public:
20 explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device); 20 explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_);
21 ~CommandPool() override; 21 ~CommandPool() override;
22 22
23 void Allocate(size_t begin, size_t end) override; 23 void Allocate(size_t begin, size_t end) override;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 9637c6059..2c030e910 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,6 +10,9 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
13#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 17#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_device.h" 18#include "video_core/renderer_vulkan/vk_device.h"
@@ -22,99 +25,6 @@ namespace Vulkan {
22 25
23namespace { 26namespace {
24 27
25// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
26constexpr u8 quad_array[] = {
27 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
28 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
29 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
30 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
31 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
32 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
33 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
34 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
35 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
36 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
37 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
38 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
39 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
40 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
41 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
42 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
43 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
44 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
45 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
46 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
47 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
48 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
49 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
50 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
51 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
52 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
53 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
54 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
55 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
56 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
57 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
58 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
59 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
60 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
61 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
62 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
63 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
64 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
65 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
66 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
67 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
68 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
69 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
70 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
71 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
72 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
73 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
74 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
75 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
76 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
77 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
78 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
79 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
80 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
81 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
82 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
83 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
84 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
85 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
86 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
87 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
88 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
89 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
90 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
91 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
92 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
93 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
94 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
95 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
96 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
97 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
98 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
99 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
100 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
101 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
102 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
103 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
104 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
105 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
106 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
107 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
108 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
109 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
110 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
111 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
112 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
113 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
114 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
115 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
116};
117
118VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { 28VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
119 return { 29 return {
120 .binding = 0, 30 .binding = 0,
@@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
144 }; 54 };
145} 55}
146 56
147// Uint8 SPIR-V module. Generated from the "shaders/" directory.
148constexpr u8 uint8_pass[] = {
149 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
150 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
151 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
152 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
153 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
154 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
155 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
156 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
157 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
158 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
159 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
160 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
161 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
162 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
163 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
164 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
165 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
166 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
167 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
168 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
169 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
170 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
171 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
172 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
173 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
174 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
175 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
176 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
177 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
178 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
179 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
180 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
181 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
182 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
183 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
184 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
185 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
186 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
187 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
188 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
189 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
190 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
191 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
192 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
193 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
194 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
195 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
196 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
197 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
198 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
199 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
200 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
201 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
202 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
203 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
204 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
205 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
206 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
207 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
208 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
209 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
210 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
211 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
212 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
213 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
214 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
215 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
216 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
217 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
218 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
219 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
220 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
221 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
222 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
223};
224
225// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
226constexpr u8 QUAD_INDEXED_SPV[] = {
227 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
228 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
229 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
230 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
231 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
232 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
233 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
234 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
235 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
236 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
237 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
238 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
239 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
240 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
241 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
242 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
243 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
244 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
245 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
246 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
247 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
248 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
249 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
250 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
251 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
252 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
253 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
254 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
255 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
256 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
257 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
258 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
259 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
260 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
261 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
262 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
263 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
264 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
265 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
266 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
267 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
268 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
269 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
270 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
271 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
272 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
273 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
274 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
275 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
276 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
277 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
278 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
279 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
280 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
281 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
282 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
283 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
284 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
285 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
286 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
287 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
288 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
289 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
290 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
291 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
292 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
293 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
294 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
295 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
296 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
297 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
298 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
299 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
300 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
301 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
302 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
303 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
304 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
305 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
306 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
307 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
308 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
309 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
310 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
311 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
312 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
313 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
314 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
315 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
316 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
317 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
318 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
319 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
320 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
321 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
322 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
323 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
324 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
325 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
326 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
327 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
328 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
329 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
330 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
331 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
332 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
333 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
334 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
335 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
336 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
337 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
338 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
339 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
340 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
341 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
342 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
343 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
344 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
345 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
346 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
347};
348
349std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { 57std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
350 return {{ 58 return {{
351 { 59 {
@@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
381VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 89VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
382 vk::Span<VkDescriptorSetLayoutBinding> bindings, 90 vk::Span<VkDescriptorSetLayoutBinding> bindings,
383 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 91 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
384 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, 92 vk::Span<VkPushConstantRange> push_constants,
385 const u8* code) { 93 std::span<const u32> code) {
386 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ 94 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
387 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 95 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
388 .pNext = nullptr, 96 .pNext = nullptr,
@@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
390 .bindingCount = bindings.size(), 98 .bindingCount = bindings.size(),
391 .pBindings = bindings.data(), 99 .pBindings = bindings.data(),
392 }); 100 });
393
394 layout = device.GetLogical().CreatePipelineLayout({ 101 layout = device.GetLogical().CreatePipelineLayout({
395 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 102 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
396 .pNext = nullptr, 103 .pNext = nullptr,
@@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
400 .pushConstantRangeCount = push_constants.size(), 107 .pushConstantRangeCount = push_constants.size(),
401 .pPushConstantRanges = push_constants.data(), 108 .pPushConstantRanges = push_constants.data(),
402 }); 109 });
403
404 if (!templates.empty()) { 110 if (!templates.empty()) {
405 descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ 111 descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
406 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 112 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
@@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
417 123
418 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); 124 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
419 } 125 }
420
421 auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
422 std::memcpy(code_copy.get(), code, code_size);
423
424 module = device.GetLogical().CreateShaderModule({ 126 module = device.GetLogical().CreateShaderModule({
425 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 127 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
426 .pNext = nullptr, 128 .pNext = nullptr,
427 .flags = 0, 129 .flags = 0,
428 .codeSize = code_size, 130 .codeSize = static_cast<u32>(code.size_bytes()),
429 .pCode = code_copy.get(), 131 .pCode = code.data(),
430 }); 132 });
431
432 pipeline = device.GetLogical().CreateComputePipeline({ 133 pipeline = device.GetLogical().CreateComputePipeline({
433 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 134 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
434 .pNext = nullptr, 135 .pNext = nullptr,
@@ -461,15 +162,15 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
461 return set; 162 return set;
462} 163}
463 164
464QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, 165QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
465 VKDescriptorPool& descriptor_pool, 166 VKDescriptorPool& descriptor_pool_,
466 VKStagingBufferPool& staging_buffer_pool, 167 VKStagingBufferPool& staging_buffer_pool_,
467 VKUpdateDescriptorQueue& update_descriptor_queue) 168 VKUpdateDescriptorQueue& update_descriptor_queue_)
468 : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), 169 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
469 BuildQuadArrayPassDescriptorUpdateTemplateEntry(), 170 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
470 BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), 171 BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
471 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 172 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
472 update_descriptor_queue{update_descriptor_queue} {} 173 update_descriptor_queue{update_descriptor_queue_} {}
473 174
474QuadArrayPass::~QuadArrayPass() = default; 175QuadArrayPass::~QuadArrayPass() = default;
475 176
@@ -510,14 +211,13 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
510 return {*buffer.handle, 0}; 211 return {*buffer.handle, 0};
511} 212}
512 213
513Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, 214Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_,
514 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, 215 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
515 VKUpdateDescriptorQueue& update_descriptor_queue) 216 VKUpdateDescriptorQueue& update_descriptor_queue_)
516 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), 217 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
517 BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), 218 BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
518 uint8_pass), 219 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
519 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 220 update_descriptor_queue{update_descriptor_queue_} {}
520 update_descriptor_queue{update_descriptor_queue} {}
521 221
522Uint8Pass::~Uint8Pass() = default; 222Uint8Pass::~Uint8Pass() = default;
523 223
@@ -555,16 +255,15 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
555 return {*buffer.handle, 0}; 255 return {*buffer.handle, 0};
556} 256}
557 257
558QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, 258QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_,
559 VKDescriptorPool& descriptor_pool, 259 VKDescriptorPool& descriptor_pool_,
560 VKStagingBufferPool& staging_buffer_pool, 260 VKStagingBufferPool& staging_buffer_pool_,
561 VKUpdateDescriptorQueue& update_descriptor_queue) 261 VKUpdateDescriptorQueue& update_descriptor_queue_)
562 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), 262 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
563 BuildInputOutputDescriptorUpdateTemplate(), 263 BuildInputOutputDescriptorUpdateTemplate(),
564 BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), 264 BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
565 QUAD_INDEXED_SPV), 265 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
566 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 266 update_descriptor_queue{update_descriptor_queue_} {}
567 update_descriptor_queue{update_descriptor_queue} {}
568 267
569QuadIndexedPass::~QuadIndexedPass() = default; 268QuadIndexedPass::~QuadIndexedPass() = default;
570 269
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index acc94f27e..abdf61e2c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <optional> 7#include <optional>
8#include <span>
8#include <utility> 9#include <utility>
9 10
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -24,8 +25,7 @@ public:
24 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 25 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
25 vk::Span<VkDescriptorSetLayoutBinding> bindings, 26 vk::Span<VkDescriptorSetLayoutBinding> bindings,
26 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 27 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
27 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, 28 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
28 const u8* code);
29 ~VKComputePass(); 29 ~VKComputePass();
30 30
31protected: 31protected:
@@ -43,10 +43,10 @@ private:
43 43
44class QuadArrayPass final : public VKComputePass { 44class QuadArrayPass final : public VKComputePass {
45public: 45public:
46 explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, 46 explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
47 VKDescriptorPool& descriptor_pool, 47 VKDescriptorPool& descriptor_pool_,
48 VKStagingBufferPool& staging_buffer_pool, 48 VKStagingBufferPool& staging_buffer_pool_,
49 VKUpdateDescriptorQueue& update_descriptor_queue); 49 VKUpdateDescriptorQueue& update_descriptor_queue_);
50 ~QuadArrayPass(); 50 ~QuadArrayPass();
51 51
52 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); 52 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
@@ -59,9 +59,10 @@ private:
59 59
60class Uint8Pass final : public VKComputePass { 60class Uint8Pass final : public VKComputePass {
61public: 61public:
62 explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler, 62 explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_,
63 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, 63 VKDescriptorPool& descriptor_pool_,
64 VKUpdateDescriptorQueue& update_descriptor_queue); 64 VKStagingBufferPool& staging_buffer_pool_,
65 VKUpdateDescriptorQueue& update_descriptor_queue_);
65 ~Uint8Pass(); 66 ~Uint8Pass();
66 67
67 std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); 68 std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
@@ -74,10 +75,10 @@ private:
74 75
75class QuadIndexedPass final : public VKComputePass { 76class QuadIndexedPass final : public VKComputePass {
76public: 77public:
77 explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, 78 explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_,
78 VKDescriptorPool& descriptor_pool, 79 VKDescriptorPool& descriptor_pool_,
79 VKStagingBufferPool& staging_buffer_pool, 80 VKStagingBufferPool& staging_buffer_pool_,
80 VKUpdateDescriptorQueue& update_descriptor_queue); 81 VKUpdateDescriptorQueue& update_descriptor_queue_);
81 ~QuadIndexedPass(); 82 ~QuadIndexedPass();
82 83
83 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, 84 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 9be72dc9b..62f44d6da 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -15,16 +15,16 @@
15 15
16namespace Vulkan { 16namespace Vulkan {
17 17
18VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, 18VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_,
19 VKDescriptorPool& descriptor_pool, 19 VKDescriptorPool& descriptor_pool_,
20 VKUpdateDescriptorQueue& update_descriptor_queue, 20 VKUpdateDescriptorQueue& update_descriptor_queue_,
21 const SPIRVShader& shader) 21 const SPIRVShader& shader_)
22 : device{device}, scheduler{scheduler}, entries{shader.entries}, 22 : device{device_}, scheduler{scheduler_}, entries{shader_.entries},
23 descriptor_set_layout{CreateDescriptorSetLayout()}, 23 descriptor_set_layout{CreateDescriptorSetLayout()},
24 descriptor_allocator{descriptor_pool, *descriptor_set_layout}, 24 descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
25 update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, 25 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
26 descriptor_template{CreateDescriptorUpdateTemplate()}, 26 descriptor_template{CreateDescriptorUpdateTemplate()},
27 shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {} 27 shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
28 28
29VKComputePipeline::~VKComputePipeline() = default; 29VKComputePipeline::~VKComputePipeline() = default;
30 30
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 6e2f22a4a..49e2113a2 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -17,10 +17,10 @@ class VKUpdateDescriptorQueue;
17 17
18class VKComputePipeline final { 18class VKComputePipeline final {
19public: 19public:
20 explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, 20 explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_,
21 VKDescriptorPool& descriptor_pool, 21 VKDescriptorPool& descriptor_pool_,
22 VKUpdateDescriptorQueue& update_descriptor_queue, 22 VKUpdateDescriptorQueue& update_descriptor_queue_,
23 const SPIRVShader& shader); 23 const SPIRVShader& shader_);
24 ~VKComputePipeline(); 24 ~VKComputePipeline();
25 25
26 VkDescriptorSet CommitDescriptorSet(); 26 VkDescriptorSet CommitDescriptorSet();
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index f34ed6735..370a63f74 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
46 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, 46 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
47 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, 47 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
48 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, 48 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
49 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
49 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 50 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
50 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 51 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
51 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 52 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
@@ -122,6 +123,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
122 VK_FORMAT_R16G16_UNORM, 123 VK_FORMAT_R16G16_UNORM,
123 VK_FORMAT_R16G16_SNORM, 124 VK_FORMAT_R16G16_SNORM,
124 VK_FORMAT_R16G16_SFLOAT, 125 VK_FORMAT_R16G16_SFLOAT,
126 VK_FORMAT_R16G16_SINT,
125 VK_FORMAT_R16_UNORM, 127 VK_FORMAT_R16_UNORM,
126 VK_FORMAT_R16_UINT, 128 VK_FORMAT_R16_UINT,
127 VK_FORMAT_R8G8B8A8_SRGB, 129 VK_FORMAT_R8G8B8A8_SRGB,
@@ -161,18 +163,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
161 VK_FORMAT_BC2_SRGB_BLOCK, 163 VK_FORMAT_BC2_SRGB_BLOCK,
162 VK_FORMAT_BC3_SRGB_BLOCK, 164 VK_FORMAT_BC3_SRGB_BLOCK,
163 VK_FORMAT_BC7_SRGB_BLOCK, 165 VK_FORMAT_BC7_SRGB_BLOCK,
166 VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
164 VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 167 VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
165 VK_FORMAT_ASTC_8x8_SRGB_BLOCK, 168 VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
166 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
167 VK_FORMAT_ASTC_5x4_SRGB_BLOCK, 169 VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
168 VK_FORMAT_ASTC_5x5_UNORM_BLOCK, 170 VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
169 VK_FORMAT_ASTC_5x5_SRGB_BLOCK, 171 VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
170 VK_FORMAT_ASTC_10x8_UNORM_BLOCK, 172 VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
171 VK_FORMAT_ASTC_10x8_SRGB_BLOCK, 173 VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
172 VK_FORMAT_ASTC_6x6_UNORM_BLOCK, 174 VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
173 VK_FORMAT_ASTC_6x6_SRGB_BLOCK, 175 VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
176 VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
177 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
178 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
179 VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
180 VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
181 VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
182 VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
183 VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
184 VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
185 VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
186 VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
187 VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
174 VK_FORMAT_ASTC_10x10_UNORM_BLOCK, 188 VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
175 VK_FORMAT_ASTC_10x10_SRGB_BLOCK, 189 VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
190 VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
191 VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
176 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, 192 VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
177 VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 193 VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
178 VK_FORMAT_ASTC_8x6_UNORM_BLOCK, 194 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
@@ -192,7 +208,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
192 208
193VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, 209VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_,
194 VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) 210 VkSurfaceKHR surface, const vk::InstanceDispatch& dld_)
195 : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 211 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
196 instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { 212 instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} {
197 SetupFamilies(surface); 213 SetupFamilies(surface);
198 SetupFeatures(); 214 SetupFeatures();
@@ -214,7 +230,7 @@ bool VKDevice::Create() {
214 features2.features = { 230 features2.features = {
215 .robustBufferAccess = false, 231 .robustBufferAccess = false,
216 .fullDrawIndexUint32 = false, 232 .fullDrawIndexUint32 = false,
217 .imageCubeArray = false, 233 .imageCubeArray = true,
218 .independentBlend = true, 234 .independentBlend = true,
219 .geometryShader = true, 235 .geometryShader = true,
220 .tessellationShader = true, 236 .tessellationShader = true,
@@ -242,7 +258,7 @@ bool VKDevice::Create() {
242 .shaderTessellationAndGeometryPointSize = false, 258 .shaderTessellationAndGeometryPointSize = false,
243 .shaderImageGatherExtended = true, 259 .shaderImageGatherExtended = true,
244 .shaderStorageImageExtendedFormats = false, 260 .shaderStorageImageExtendedFormats = false,
245 .shaderStorageImageMultisample = false, 261 .shaderStorageImageMultisample = true,
246 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, 262 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
247 .shaderStorageImageWriteWithoutFormat = true, 263 .shaderStorageImageWriteWithoutFormat = true,
248 .shaderUniformBufferArrayDynamicIndexing = false, 264 .shaderUniformBufferArrayDynamicIndexing = false,
@@ -268,7 +284,6 @@ bool VKDevice::Create() {
268 .variableMultisampleRate = false, 284 .variableMultisampleRate = false,
269 .inheritedQueries = false, 285 .inheritedQueries = false,
270 }; 286 };
271
272 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ 287 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
273 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, 288 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
274 .pNext = nullptr, 289 .pNext = nullptr,
@@ -380,6 +395,20 @@ bool VKDevice::Create() {
380 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 395 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
381 } 396 }
382 397
398 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
399 if (ext_robustness2) {
400 robustness2 = {
401 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
402 .pNext = nullptr,
403 .robustBufferAccess2 = false,
404 .robustImageAccess2 = true,
405 .nullDescriptor = true,
406 };
407 SetNext(next, robustness2);
408 } else {
409 LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
410 }
411
383 if (!ext_depth_range_unrestricted) { 412 if (!ext_depth_range_unrestricted) {
384 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 413 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
385 } 414 }
@@ -405,7 +434,14 @@ bool VKDevice::Create() {
405 } 434 }
406 435
407 CollectTelemetryParameters(); 436 CollectTelemetryParameters();
437 CollectToolingInfo();
408 438
439 if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
440 LOG_WARNING(
441 Render_Vulkan,
442 "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
443 ext_extended_dynamic_state = false;
444 }
409 if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { 445 if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
410 // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it 446 // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
411 // seems to cause stability issues 447 // seems to cause stability issues
@@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const {
458 LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); 494 LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
459 495
460 // Wait for the log to flush and for Nsight Aftermath to dump the results 496 // Wait for the log to flush and for Nsight Aftermath to dump the results
461 std::this_thread::sleep_for(std::chrono::seconds{3}); 497 std::this_thread::sleep_for(std::chrono::seconds{15});
462} 498}
463 499
464void VKDevice::SaveShader(const std::vector<u32>& spirv) const { 500void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
@@ -491,14 +527,24 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)
491 VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | 527 VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
492 VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; 528 VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
493 for (const auto format : astc_formats) { 529 for (const auto format : astc_formats) {
494 const auto format_properties{physical.GetFormatProperties(format)}; 530 const auto physical_format_properties{physical.GetFormatProperties(format)};
495 if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { 531 if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) {
496 return false; 532 return false;
497 } 533 }
498 } 534 }
499 return true; 535 return true;
500} 536}
501 537
538bool VKDevice::TestDepthStencilBlits() const {
539 static constexpr VkFormatFeatureFlags required_features =
540 VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
541 const auto test_features = [](VkFormatProperties props) {
542 return (props.optimalTilingFeatures & required_features) == required_features;
543 };
544 return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
545 test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
546}
547
502bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 548bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
503 FormatType format_type) const { 549 FormatType format_type) const {
504 const auto it = format_properties.find(wanted_format); 550 const auto it = format_properties.find(wanted_format);
@@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
569 const auto features{physical.GetFeatures()}; 615 const auto features{physical.GetFeatures()};
570 const std::array feature_report = { 616 const std::array feature_report = {
571 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 617 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
618 std::make_pair(features.imageCubeArray, "imageCubeArray"),
572 std::make_pair(features.independentBlend, "independentBlend"), 619 std::make_pair(features.independentBlend, "independentBlend"),
573 std::make_pair(features.depthClamp, "depthClamp"), 620 std::make_pair(features.depthClamp, "depthClamp"),
574 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), 621 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
@@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
580 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), 627 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
581 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), 628 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
582 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 629 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
630 std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
583 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 631 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
584 "shaderStorageImageWriteWithoutFormat"), 632 "shaderStorageImageWriteWithoutFormat"),
585 }; 633 };
@@ -608,6 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
608 bool has_ext_transform_feedback{}; 656 bool has_ext_transform_feedback{};
609 bool has_ext_custom_border_color{}; 657 bool has_ext_custom_border_color{};
610 bool has_ext_extended_dynamic_state{}; 658 bool has_ext_extended_dynamic_state{};
659 bool has_ext_robustness2{};
611 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 660 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
612 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 661 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
613 bool push) { 662 bool push) {
@@ -627,11 +676,15 @@ std::vector<const char*> VKDevice::LoadExtensions() {
627 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 676 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
628 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 677 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
629 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 678 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
679 test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
630 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, 680 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
631 true); 681 true);
682 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
683 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
632 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); 684 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
633 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 685 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
634 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 686 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
687 test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
635 if (instance_version >= VK_API_VERSION_1_1) { 688 if (instance_version >= VK_API_VERSION_1_1) {
636 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 689 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
637 } 690 }
@@ -644,8 +697,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
644 VkPhysicalDeviceFeatures2KHR features; 697 VkPhysicalDeviceFeatures2KHR features;
645 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; 698 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
646 699
647 VkPhysicalDeviceProperties2KHR properties; 700 VkPhysicalDeviceProperties2KHR physical_properties;
648 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; 701 physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
649 702
650 if (has_khr_shader_float16_int8) { 703 if (has_khr_shader_float16_int8) {
651 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; 704 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features;
@@ -670,8 +723,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
670 subgroup_properties.sType = 723 subgroup_properties.sType =
671 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; 724 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
672 subgroup_properties.pNext = nullptr; 725 subgroup_properties.pNext = nullptr;
673 properties.pNext = &subgroup_properties; 726 physical_properties.pNext = &subgroup_properties;
674 physical.GetProperties2KHR(properties); 727 physical.GetProperties2KHR(physical_properties);
675 728
676 is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; 729 is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize;
677 730
@@ -695,8 +748,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
695 VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; 748 VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
696 tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; 749 tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
697 tfb_properties.pNext = nullptr; 750 tfb_properties.pNext = nullptr;
698 properties.pNext = &tfb_properties; 751 physical_properties.pNext = &tfb_properties;
699 physical.GetProperties2KHR(properties); 752 physical.GetProperties2KHR(physical_properties);
700 753
701 if (tfb_features.transformFeedback && tfb_features.geometryStreams && 754 if (tfb_features.transformFeedback && tfb_features.geometryStreams &&
702 tfb_properties.maxTransformFeedbackStreams >= 4 && 755 tfb_properties.maxTransformFeedbackStreams >= 4 &&
@@ -733,6 +786,18 @@ std::vector<const char*> VKDevice::LoadExtensions() {
733 } 786 }
734 } 787 }
735 788
789 if (has_ext_robustness2) {
790 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
791 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
792 robustness2.pNext = nullptr;
793 features.pNext = &robustness2;
794 physical.GetFeatures2KHR(features);
795 if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
796 extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
797 ext_robustness2 = true;
798 }
799 }
800
736 return extensions; 801 return extensions;
737} 802}
738 803
@@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
764void VKDevice::SetupFeatures() { 829void VKDevice::SetupFeatures() {
765 const auto supported_features{physical.GetFeatures()}; 830 const auto supported_features{physical.GetFeatures()};
766 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; 831 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
832 is_blit_depth_stencil_supported = TestDepthStencilBlits();
767 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); 833 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
768} 834}
769 835
@@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() {
794 } 860 }
795} 861}
796 862
863void VKDevice::CollectToolingInfo() {
864 if (!ext_tooling_info) {
865 return;
866 }
867 const auto vkGetPhysicalDeviceToolPropertiesEXT =
868 reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
869 dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
870 if (!vkGetPhysicalDeviceToolPropertiesEXT) {
871 return;
872 }
873 u32 tool_count = 0;
874 if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
875 return;
876 }
877 std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
878 if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
879 return;
880 }
881 for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
882 const std::string_view name = tool.name;
883 LOG_INFO(Render_Vulkan, "{}", name);
884 has_renderdoc = has_renderdoc || name == "RenderDoc";
885 has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
886 }
887}
888
797std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { 889std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
798 static constexpr float QUEUE_PRIORITY = 1.0f; 890 static constexpr float QUEUE_PRIORITY = 1.0f;
799 891
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 4286673d9..995dcfc0f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -157,6 +157,11 @@ public:
157 return is_formatless_image_load_supported; 157 return is_formatless_image_load_supported;
158 } 158 }
159 159
160 /// Returns true when blitting from and to depth stencil images is supported.
161 bool IsBlitDepthStencilSupported() const {
162 return is_blit_depth_stencil_supported;
163 }
164
160 /// Returns true if the device supports VK_NV_viewport_swizzle. 165 /// Returns true if the device supports VK_NV_viewport_swizzle.
161 bool IsNvViewportSwizzleSupported() const { 166 bool IsNvViewportSwizzleSupported() const {
162 return nv_viewport_swizzle; 167 return nv_viewport_swizzle;
@@ -172,6 +177,11 @@ public:
172 return ext_index_type_uint8; 177 return ext_index_type_uint8;
173 } 178 }
174 179
180 /// Returns true if the device supports VK_EXT_sampler_filter_minmax.
181 bool IsExtSamplerFilterMinmaxSupported() const {
182 return ext_sampler_filter_minmax;
183 }
184
175 /// Returns true if the device supports VK_EXT_depth_range_unrestricted. 185 /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
176 bool IsExtDepthRangeUnrestrictedSupported() const { 186 bool IsExtDepthRangeUnrestrictedSupported() const {
177 return ext_depth_range_unrestricted; 187 return ext_depth_range_unrestricted;
@@ -197,6 +207,16 @@ public:
197 return ext_extended_dynamic_state; 207 return ext_extended_dynamic_state;
198 } 208 }
199 209
210 /// Returns true if the device supports VK_EXT_shader_stencil_export.
211 bool IsExtShaderStencilExportSupported() const {
212 return ext_shader_stencil_export;
213 }
214
215 /// Returns true when a known debugging tool is attached.
216 bool HasDebuggingToolAttached() const {
217 return has_renderdoc || has_nsight_graphics;
218 }
219
200 /// Returns the vendor name reported from Vulkan. 220 /// Returns the vendor name reported from Vulkan.
201 std::string_view GetVendorName() const { 221 std::string_view GetVendorName() const {
202 return vendor_name; 222 return vendor_name;
@@ -228,16 +248,23 @@ private:
228 /// Collects telemetry information from the device. 248 /// Collects telemetry information from the device.
229 void CollectTelemetryParameters(); 249 void CollectTelemetryParameters();
230 250
251 /// Collects information about attached tools.
252 void CollectToolingInfo();
253
231 /// Returns a list of queue initialization descriptors. 254 /// Returns a list of queue initialization descriptors.
232 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 255 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
233 256
234 /// Returns true if ASTC textures are natively supported. 257 /// Returns true if ASTC textures are natively supported.
235 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; 258 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
236 259
260 /// Returns true if the device natively supports blitting depth stencil images.
261 bool TestDepthStencilBlits() const;
262
237 /// Returns true if a format is supported. 263 /// Returns true if a format is supported.
238 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 264 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
239 FormatType format_type) const; 265 FormatType format_type) const;
240 266
267 VkInstance instance; ///< Vulkan instance.
241 vk::DeviceDispatch dld; ///< Device function pointers. 268 vk::DeviceDispatch dld; ///< Device function pointers.
242 vk::PhysicalDevice physical; ///< Physical device. 269 vk::PhysicalDevice physical; ///< Physical device.
243 VkPhysicalDeviceProperties properties; ///< Device properties. 270 VkPhysicalDeviceProperties properties; ///< Device properties.
@@ -253,15 +280,22 @@ private:
253 bool is_float16_supported{}; ///< Support for float16 arithmetics. 280 bool is_float16_supported{}; ///< Support for float16 arithmetics.
254 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 281 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
255 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 282 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
283 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
256 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 284 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
257 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 285 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
258 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 286 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
287 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
259 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 288 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
260 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 289 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
290 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
261 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 291 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
262 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 292 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
263 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 293 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
294 bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
295 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
264 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 296 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
297 bool has_renderdoc{}; ///< Has RenderDoc attached
298 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
265 299
266 // Asynchronous Graphics Pipeline setting 300 // Asynchronous Graphics Pipeline setting
267 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline 301 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 5babbdd0b..774a12a53 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -14,12 +14,13 @@
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
17InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed) 17InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_,
18 : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {} 18 bool is_stubbed_)
19 : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
19 20
20InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, 21InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_,
21 u32 payload, bool is_stubbed) 22 u32 payload_, bool is_stubbed_)
22 : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {} 23 : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
23 24
24InnerFence::~InnerFence() = default; 25InnerFence::~InnerFence() = default;
25 26
@@ -71,11 +72,11 @@ bool InnerFence::IsEventSignalled() const {
71 } 72 }
72} 73}
73 74
74VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, 75VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
75 Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, 76 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
76 VKBufferCache& buffer_cache, VKQueryCache& query_cache, 77 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
77 const VKDevice& device_, VKScheduler& scheduler_) 78 const VKDevice& device_, VKScheduler& scheduler_)
78 : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache), 79 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
79 device{device_}, scheduler{scheduler_} {} 80 device{device_}, scheduler{scheduler_} {}
80 81
81Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { 82Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 1547d6d30..c2869e8e3 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -8,6 +8,7 @@
8 8
9#include "video_core/fence_manager.h" 9#include "video_core/fence_manager.h"
10#include "video_core/renderer_vulkan/vk_buffer_cache.h" 10#include "video_core/renderer_vulkan/vk_buffer_cache.h"
11#include "video_core/renderer_vulkan/vk_texture_cache.h"
11#include "video_core/renderer_vulkan/wrapper.h" 12#include "video_core/renderer_vulkan/wrapper.h"
12 13
13namespace Core { 14namespace Core {
@@ -24,14 +25,13 @@ class VKBufferCache;
24class VKDevice; 25class VKDevice;
25class VKQueryCache; 26class VKQueryCache;
26class VKScheduler; 27class VKScheduler;
27class VKTextureCache;
28 28
29class InnerFence : public VideoCommon::FenceBase { 29class InnerFence : public VideoCommon::FenceBase {
30public: 30public:
31 explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, 31 explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_,
32 bool is_stubbed); 32 bool is_stubbed_);
33 explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, 33 explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_,
34 u32 payload, bool is_stubbed); 34 u32 payload_, bool is_stubbed_);
35 ~InnerFence(); 35 ~InnerFence();
36 36
37 void Queue(); 37 void Queue();
@@ -51,14 +51,14 @@ private:
51using Fence = std::shared_ptr<InnerFence>; 51using Fence = std::shared_ptr<InnerFence>;
52 52
53using GenericFenceManager = 53using GenericFenceManager =
54 VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; 54 VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
55 55
56class VKFenceManager final : public GenericFenceManager { 56class VKFenceManager final : public GenericFenceManager {
57public: 57public:
58 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, 58 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
59 Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, 59 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
60 VKBufferCache& buffer_cache, VKQueryCache& query_cache, 60 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
61 const VKDevice& device, VKScheduler& scheduler); 61 const VKDevice& device_, VKScheduler& scheduler_);
62 62
63protected: 63protected:
64 Fence CreateFence(u32 value, bool is_stubbed) override; 64 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 0e8f9c352..7979df3a8 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -15,7 +15,6 @@
15#include "video_core/renderer_vulkan/vk_device.h" 15#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 16#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
17#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 17#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
18#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h" 18#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/renderer_vulkan/vk_update_descriptor.h" 19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
21#include "video_core/renderer_vulkan/wrapper.h" 20#include "video_core/renderer_vulkan/wrapper.h"
@@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
69 }; 68 };
70} 69}
71 70
71VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
72 switch (msaa_mode) {
73 case Tegra::Texture::MsaaMode::Msaa1x1:
74 return VK_SAMPLE_COUNT_1_BIT;
75 case Tegra::Texture::MsaaMode::Msaa2x1:
76 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
77 return VK_SAMPLE_COUNT_2_BIT;
78 case Tegra::Texture::MsaaMode::Msaa2x2:
79 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
80 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
81 return VK_SAMPLE_COUNT_4_BIT;
82 case Tegra::Texture::MsaaMode::Msaa4x2:
83 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
84 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
85 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
86 return VK_SAMPLE_COUNT_8_BIT;
87 case Tegra::Texture::MsaaMode::Msaa4x4:
88 return VK_SAMPLE_COUNT_16_BIT;
89 default:
90 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
91 return VK_SAMPLE_COUNT_1_BIT;
92 }
93}
94
72} // Anonymous namespace 95} // Anonymous namespace
73 96
74VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, 97VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
75 VKDescriptorPool& descriptor_pool, 98 VKDescriptorPool& descriptor_pool_,
76 VKUpdateDescriptorQueue& update_descriptor_queue, 99 VKUpdateDescriptorQueue& update_descriptor_queue_,
77 VKRenderPassCache& renderpass_cache,
78 const GraphicsPipelineCacheKey& key, 100 const GraphicsPipelineCacheKey& key,
79 vk::Span<VkDescriptorSetLayoutBinding> bindings, 101 vk::Span<VkDescriptorSetLayoutBinding> bindings,
80 const SPIRVProgram& program) 102 const SPIRVProgram& program, u32 num_color_buffers)
81 : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, 103 : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
82 descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, 104 descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
83 descriptor_allocator{descriptor_pool, *descriptor_set_layout}, 105 descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
84 update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, 106 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
85 descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( 107 descriptor_template{CreateDescriptorUpdateTemplate(program)},
86 program)}, 108 modules(CreateShaderModules(program)),
87 renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, 109 pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
88 pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
89 110
90VKGraphicsPipeline::~VKGraphicsPipeline() = default; 111VKGraphicsPipeline::~VKGraphicsPipeline() = default;
91 112
@@ -162,8 +183,8 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
162 .codeSize = 0, 183 .codeSize = 0,
163 }; 184 };
164 185
165 std::vector<vk::ShaderModule> modules; 186 std::vector<vk::ShaderModule> shader_modules;
166 modules.reserve(Maxwell::MaxShaderStage); 187 shader_modules.reserve(Maxwell::MaxShaderStage);
167 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { 188 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
168 const auto& stage = program[i]; 189 const auto& stage = program[i];
169 if (!stage) { 190 if (!stage) {
@@ -174,13 +195,14 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
174 195
175 ci.codeSize = stage->code.size() * sizeof(u32); 196 ci.codeSize = stage->code.size() * sizeof(u32);
176 ci.pCode = stage->code.data(); 197 ci.pCode = stage->code.data();
177 modules.push_back(device.GetLogical().CreateShaderModule(ci)); 198 shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
178 } 199 }
179 return modules; 200 return shader_modules;
180} 201}
181 202
182vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, 203vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
183 const SPIRVProgram& program) const { 204 VkRenderPass renderpass,
205 u32 num_color_buffers) const {
184 const auto& state = cache_key.fixed_state; 206 const auto& state = cache_key.fixed_state;
185 const auto& viewport_swizzles = state.viewport_swizzles; 207 const auto& viewport_swizzles = state.viewport_swizzles;
186 208
@@ -230,7 +252,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
230 if (!attribute.enabled) { 252 if (!attribute.enabled) {
231 continue; 253 continue;
232 } 254 }
233 if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) { 255 if (!input_attributes.contains(static_cast<u32>(index))) {
234 // Skip attributes not used by the vertex shaders. 256 // Skip attributes not used by the vertex shaders.
235 continue; 257 continue;
236 } 258 }
@@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
290 }; 312 };
291 313
292 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; 314 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
293 std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), 315 std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
294 UnpackViewportSwizzle);
295 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ 316 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, 317 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
297 .pNext = nullptr, 318 .pNext = nullptr,
@@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
326 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 347 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
327 .pNext = nullptr, 348 .pNext = nullptr,
328 .flags = 0, 349 .flags = 0,
329 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, 350 .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
330 .sampleShadingEnable = VK_FALSE, 351 .sampleShadingEnable = VK_FALSE,
331 .minSampleShading = 0.0f, 352 .minSampleShading = 0.0f,
332 .pSampleMask = nullptr, 353 .pSampleMask = nullptr,
@@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
352 }; 373 };
353 374
354 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 375 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
355 const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); 376 for (std::size_t index = 0; index < num_color_buffers; ++index) {
356 for (std::size_t index = 0; index < num_attachments; ++index) {
357 static constexpr std::array COMPONENT_TABLE{ 377 static constexpr std::array COMPONENT_TABLE{
358 VK_COLOR_COMPONENT_R_BIT, 378 VK_COLOR_COMPONENT_R_BIT,
359 VK_COLOR_COMPONENT_G_BIT, 379 VK_COLOR_COMPONENT_G_BIT,
@@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
387 .flags = 0, 407 .flags = 0,
388 .logicOpEnable = VK_FALSE, 408 .logicOpEnable = VK_FALSE,
389 .logicOp = VK_LOGIC_OP_COPY, 409 .logicOp = VK_LOGIC_OP_COPY,
390 .attachmentCount = static_cast<u32>(num_attachments), 410 .attachmentCount = num_color_buffers,
391 .pAttachments = cb_attachments.data(), 411 .pAttachments = cb_attachments.data(),
392 .blendConstants = {}, 412 .blendConstants = {},
393 }; 413 };
@@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
447 stage_ci.pNext = &subgroup_size_ci; 467 stage_ci.pNext = &subgroup_size_ci;
448 } 468 }
449 } 469 }
450 470 return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
451 const VkGraphicsPipelineCreateInfo ci{
452 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 471 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
453 .pNext = nullptr, 472 .pNext = nullptr,
454 .flags = 0, 473 .flags = 0,
@@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
468 .subpass = 0, 487 .subpass = 0,
469 .basePipelineHandle = nullptr, 488 .basePipelineHandle = nullptr,
470 .basePipelineIndex = 0, 489 .basePipelineIndex = 0,
471 }; 490 });
472 return device.GetLogical().CreateGraphicsPipeline(ci);
473} 491}
474 492
475} // namespace Vulkan 493} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 58aa35efd..214d06b4c 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -8,10 +8,10 @@
8#include <optional> 8#include <optional>
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 13#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
13#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
14#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 15#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
16#include "video_core/renderer_vulkan/wrapper.h" 16#include "video_core/renderer_vulkan/wrapper.h"
17 17
@@ -20,8 +20,7 @@ namespace Vulkan {
20using Maxwell = Tegra::Engines::Maxwell3D::Regs; 20using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21 21
22struct GraphicsPipelineCacheKey { 22struct GraphicsPipelineCacheKey {
23 RenderPassParams renderpass_params; 23 VkRenderPass renderpass;
24 u32 padding;
25 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; 24 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
26 FixedPipelineState fixed_state; 25 FixedPipelineState fixed_state;
27 26
@@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey {
34 } 33 }
35 34
36 std::size_t Size() const noexcept { 35 std::size_t Size() const noexcept {
37 return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); 36 return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
38 } 37 }
39}; 38};
40static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); 39static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
@@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
43 42
44class VKDescriptorPool; 43class VKDescriptorPool;
45class VKDevice; 44class VKDevice;
46class VKRenderPassCache;
47class VKScheduler; 45class VKScheduler;
48class VKUpdateDescriptorQueue; 46class VKUpdateDescriptorQueue;
49 47
@@ -51,13 +49,12 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt
51 49
52class VKGraphicsPipeline final { 50class VKGraphicsPipeline final {
53public: 51public:
54 explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, 52 explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
55 VKDescriptorPool& descriptor_pool, 53 VKDescriptorPool& descriptor_pool,
56 VKUpdateDescriptorQueue& update_descriptor_queue, 54 VKUpdateDescriptorQueue& update_descriptor_queue_,
57 VKRenderPassCache& renderpass_cache,
58 const GraphicsPipelineCacheKey& key, 55 const GraphicsPipelineCacheKey& key,
59 vk::Span<VkDescriptorSetLayoutBinding> bindings, 56 vk::Span<VkDescriptorSetLayoutBinding> bindings,
60 const SPIRVProgram& program); 57 const SPIRVProgram& program, u32 num_color_buffers);
61 ~VKGraphicsPipeline(); 58 ~VKGraphicsPipeline();
62 59
63 VkDescriptorSet CommitDescriptorSet(); 60 VkDescriptorSet CommitDescriptorSet();
@@ -70,10 +67,6 @@ public:
70 return *layout; 67 return *layout;
71 } 68 }
72 69
73 VkRenderPass GetRenderPass() const {
74 return renderpass;
75 }
76
77 GraphicsPipelineCacheKey GetCacheKey() const { 70 GraphicsPipelineCacheKey GetCacheKey() const {
78 return cache_key; 71 return cache_key;
79 } 72 }
@@ -89,8 +82,8 @@ private:
89 82
90 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; 83 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
91 84
92 vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, 85 vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
93 const SPIRVProgram& program) const; 86 u32 num_color_buffers) const;
94 87
95 const VKDevice& device; 88 const VKDevice& device;
96 VKScheduler& scheduler; 89 VKScheduler& scheduler;
@@ -104,7 +97,6 @@ private:
104 vk::DescriptorUpdateTemplateKHR descriptor_template; 97 vk::DescriptorUpdateTemplateKHR descriptor_template;
105 std::vector<vk::ShaderModule> modules; 98 std::vector<vk::ShaderModule> modules;
106 99
107 VkRenderPass renderpass;
108 vk::Pipeline pipeline; 100 vk::Pipeline pipeline;
109}; 101};
110 102
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
deleted file mode 100644
index 1c418ea17..000000000
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "common/assert.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_image.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13
14namespace Vulkan {
15
16VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci,
17 VkImageAspectFlags aspect_mask)
18 : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
19 image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
20 UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
21 "Queue family tracking is not implemented");
22
23 image = device.GetLogical().CreateImage(image_ci);
24
25 const u32 num_ranges = image_num_layers * image_num_levels;
26 barriers.resize(num_ranges);
27 subrange_states.resize(num_ranges, {{}, image_ci.initialLayout});
28}
29
30VKImage::~VKImage() = default;
31
32void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
33 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
34 VkImageLayout new_layout) {
35 if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
36 return;
37 }
38
39 std::size_t cursor = 0;
40 for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
41 for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
42 const u32 layer = base_layer + layer_it;
43 const u32 level = base_level + level_it;
44 auto& state = GetSubrangeState(layer, level);
45 auto& barrier = barriers[cursor];
46 barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
47 barrier.pNext = nullptr;
48 barrier.srcAccessMask = state.access;
49 barrier.dstAccessMask = new_access;
50 barrier.oldLayout = state.layout;
51 barrier.newLayout = new_layout;
52 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
53 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
54 barrier.image = *image;
55 barrier.subresourceRange.aspectMask = aspect_mask;
56 barrier.subresourceRange.baseMipLevel = level;
57 barrier.subresourceRange.levelCount = 1;
58 barrier.subresourceRange.baseArrayLayer = layer;
59 barrier.subresourceRange.layerCount = 1;
60 state.access = new_access;
61 state.layout = new_layout;
62 }
63 }
64
65 scheduler.RequestOutsideRenderPassOperationContext();
66
67 scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) {
68 // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
69 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
70 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {},
71 vk::Span(barriers.data(), cursor));
72 });
73}
74
75bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
76 VkAccessFlags new_access, VkImageLayout new_layout) noexcept {
77 const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
78 base_level == 0 && num_levels == image_num_levels;
79 if (!is_full_range) {
80 state_diverged = true;
81 }
82
83 if (!state_diverged) {
84 auto& state = GetSubrangeState(0, 0);
85 if (state.access != new_access || state.layout != new_layout) {
86 return true;
87 }
88 }
89
90 for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
91 for (u32 level_it = 0; level_it < num_levels; ++level_it) {
92 const u32 layer = base_layer + layer_it;
93 const u32 level = base_level + level_it;
94 auto& state = GetSubrangeState(layer, level);
95 if (state.access != new_access || state.layout != new_layout) {
96 return true;
97 }
98 }
99 }
100 return false;
101}
102
103void VKImage::CreatePresentView() {
104 // Image type has to be 2D to be presented.
105 present_view = device.GetLogical().CreateImageView({
106 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
107 .pNext = nullptr,
108 .flags = 0,
109 .image = *image,
110 .viewType = VK_IMAGE_VIEW_TYPE_2D,
111 .format = format,
112 .components =
113 {
114 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
115 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
116 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
117 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
118 },
119 .subresourceRange =
120 {
121 .aspectMask = aspect_mask,
122 .baseMipLevel = 0,
123 .levelCount = 1,
124 .baseArrayLayer = 0,
125 .layerCount = 1,
126 },
127 });
128}
129
130VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
131 return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
132 static_cast<std::size_t>(level)];
133}
134
135} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h
deleted file mode 100644
index b4d7229e5..000000000
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ /dev/null
@@ -1,84 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/wrapper.h"
12
13namespace Vulkan {
14
15class VKDevice;
16class VKScheduler;
17
18class VKImage {
19public:
20 explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
21 const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask);
22 ~VKImage();
23
24 /// Records in the passed command buffer an image transition and updates the state of the image.
25 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
26 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
27 VkImageLayout new_layout);
28
29 /// Returns a view compatible with presentation, the image has to be 2D.
30 VkImageView GetPresentView() {
31 if (!present_view) {
32 CreatePresentView();
33 }
34 return *present_view;
35 }
36
37 /// Returns the Vulkan image handler.
38 const vk::Image& GetHandle() const {
39 return image;
40 }
41
42 /// Returns the Vulkan format for this image.
43 VkFormat GetFormat() const {
44 return format;
45 }
46
47 /// Returns the Vulkan aspect mask.
48 VkImageAspectFlags GetAspectMask() const {
49 return aspect_mask;
50 }
51
52private:
53 struct SubrangeState final {
54 VkAccessFlags access = 0; ///< Current access bits.
55 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout.
56 };
57
58 bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
59 VkAccessFlags new_access, VkImageLayout new_layout) noexcept;
60
61 /// Creates a presentation view.
62 void CreatePresentView();
63
64 /// Returns the subrange state for a layer and layer.
65 SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
66
67 const VKDevice& device; ///< Device handler.
68 VKScheduler& scheduler; ///< Device scheduler.
69
70 const VkFormat format; ///< Vulkan format.
71 const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
72 const u32 image_num_layers; ///< Number of layers.
73 const u32 image_num_levels; ///< Number of mipmap levels.
74
75 vk::Image image; ///< Image handle.
76 vk::ImageView present_view; ///< Image view compatible with presentation.
77
78 std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers.
79 std::vector<SubrangeState> subrange_states; ///< Current subrange state.
80
81 bool state_diverged = false; ///< True when subresources mismatch in layout.
82};
83
84} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 24c8960ac..56b24b70f 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -29,10 +29,10 @@ u64 GetAllocationChunkSize(u64 required_size) {
29 29
30class VKMemoryAllocation final { 30class VKMemoryAllocation final {
31public: 31public:
32 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, 32 explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_,
33 VkMemoryPropertyFlags properties, u64 allocation_size, u32 type) 33 VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_)
34 : device{device}, memory{std::move(memory)}, properties{properties}, 34 : device{device_}, memory{std::move(memory_)}, properties{properties_},
35 allocation_size{allocation_size}, shifted_type{ShiftType(type)} {} 35 allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {}
36 36
37 VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) { 37 VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) {
38 auto found = TryFindFreeSection(free_iterator, allocation_size, 38 auto found = TryFindFreeSection(free_iterator, allocation_size,
@@ -117,8 +117,8 @@ private:
117 std::vector<const VKMemoryCommitImpl*> commits; 117 std::vector<const VKMemoryCommitImpl*> commits;
118}; 118};
119 119
120VKMemoryManager::VKMemoryManager(const VKDevice& device) 120VKMemoryManager::VKMemoryManager(const VKDevice& device_)
121 : device{device}, properties{device.GetPhysical().GetMemoryProperties()} {} 121 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
122 122
123VKMemoryManager::~VKMemoryManager() = default; 123VKMemoryManager::~VKMemoryManager() = default;
124 124
@@ -207,16 +207,16 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi
207 return {}; 207 return {};
208} 208}
209 209
210VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, 210VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_,
211 const vk::DeviceMemory& memory, u64 begin, u64 end) 211 const vk::DeviceMemory& memory_, u64 begin_, u64 end_)
212 : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {} 212 : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {}
213 213
214VKMemoryCommitImpl::~VKMemoryCommitImpl() { 214VKMemoryCommitImpl::~VKMemoryCommitImpl() {
215 allocation->Free(this); 215 allocation->Free(this);
216} 216}
217 217
218MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { 218MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
219 return MemoryMap{this, memory.Map(interval.first + offset_, size)}; 219 return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
220} 220}
221 221
222void VKMemoryCommitImpl::Unmap() const { 222void VKMemoryCommitImpl::Unmap() const {
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 1af88e3d4..318f8b43e 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <span>
8#include <utility> 9#include <utility>
9#include <vector> 10#include <vector>
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -21,7 +22,7 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
21 22
22class VKMemoryManager final { 23class VKMemoryManager final {
23public: 24public:
24 explicit VKMemoryManager(const VKDevice& device); 25 explicit VKMemoryManager(const VKDevice& device_);
25 VKMemoryManager(const VKMemoryManager&) = delete; 26 VKMemoryManager(const VKMemoryManager&) = delete;
26 ~VKMemoryManager(); 27 ~VKMemoryManager();
27 28
@@ -58,8 +59,8 @@ class VKMemoryCommitImpl final {
58 friend MemoryMap; 59 friend MemoryMap;
59 60
60public: 61public:
61 explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, 62 explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_,
62 const vk::DeviceMemory& memory, u64 begin, u64 end); 63 const vk::DeviceMemory& memory_, u64 begin_, u64 end_);
63 ~VKMemoryCommitImpl(); 64 ~VKMemoryCommitImpl();
64 65
65 /// Maps a memory region and returns a pointer to it. 66 /// Maps a memory region and returns a pointer to it.
@@ -93,8 +94,8 @@ private:
93/// Holds ownership of a memory map. 94/// Holds ownership of a memory map.
94class MemoryMap final { 95class MemoryMap final {
95public: 96public:
96 explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address) 97 explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
97 : commit{commit}, address{address} {} 98 : commit{commit_}, span{span_} {}
98 99
99 ~MemoryMap() { 100 ~MemoryMap() {
100 if (commit) { 101 if (commit) {
@@ -108,19 +109,24 @@ public:
108 commit = nullptr; 109 commit = nullptr;
109 } 110 }
110 111
112 /// Returns a span to the memory map.
113 [[nodiscard]] std::span<u8> Span() const noexcept {
114 return span;
115 }
116
111 /// Returns the address of the memory map. 117 /// Returns the address of the memory map.
112 u8* GetAddress() const { 118 [[nodiscard]] u8* Address() const noexcept {
113 return address; 119 return span.data();
114 } 120 }
115 121
116 /// Returns the address of the memory map; 122 /// Returns the address of the memory map;
117 operator u8*() const { 123 [[nodiscard]] operator u8*() const noexcept {
118 return address; 124 return span.data();
119 } 125 }
120 126
121private: 127private:
122 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. 128 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
123 u8* address{}; ///< Address to the mapped memory. 129 std::span<u8> span; ///< Address to the mapped memory.
124}; 130};
125 131
126} // namespace Vulkan 132} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index df7e8c864..083796d05 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/bit_cast.h" 10#include "common/bit_cast.h"
11#include "common/cityhash.h"
11#include "common/microprofile.h" 12#include "common/microprofile.h"
12#include "core/core.h" 13#include "core/core.h"
13#include "core/memory.h" 14#include "core/memory.h"
@@ -22,7 +23,6 @@
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 23#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 25#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 26#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_update_descriptor.h" 27#include "video_core/renderer_vulkan/vk_update_descriptor.h"
28#include "video_core/renderer_vulkan/wrapper.h" 28#include "video_core/renderer_vulkan/wrapper.h"
@@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX
52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
53 53
54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
55 VideoCommon::Shader::CompileDepth::FullDecompile}; 55 .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
56 .disable_else_derivation = true,
57};
56 58
57constexpr std::size_t GetStageFromProgram(std::size_t program) { 59constexpr std::size_t GetStageFromProgram(std::size_t program) {
58 return program == 0 ? 0 : program - 1; 60 return program == 0 ? 0 : program - 1;
@@ -75,7 +77,7 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
75 case Maxwell::ShaderProgram::Fragment: 77 case Maxwell::ShaderProgram::Fragment:
76 return ShaderType::Fragment; 78 return ShaderType::Fragment;
77 default: 79 default:
78 UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program)); 80 UNIMPLEMENTED_MSG("program={}", program);
79 return ShaderType::Vertex; 81 return ShaderType::Vertex;
80 } 82 }
81} 83}
@@ -136,26 +138,24 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
136 return std::memcmp(&rhs, this, sizeof *this) == 0; 138 return std::memcmp(&rhs, this, sizeof *this) == 0;
137} 139}
138 140
139Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage, 141Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
140 GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_, 142 GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
141 u32 main_offset) 143 : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
142 : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine), 144 shader_ir(program_code, main_offset_, compiler_settings, registry),
143 shader_ir(program_code, main_offset, compiler_settings, registry),
144 entries(GenerateShaderEntries(shader_ir)) {} 145 entries(GenerateShaderEntries(shader_ir)) {}
145 146
146Shader::~Shader() = default; 147Shader::~Shader() = default;
147 148
148VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_, 149VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
149 Tegra::Engines::Maxwell3D& maxwell3d_, 150 Tegra::Engines::Maxwell3D& maxwell3d_,
150 Tegra::Engines::KeplerCompute& kepler_compute_, 151 Tegra::Engines::KeplerCompute& kepler_compute_,
151 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, 152 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
152 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 153 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
153 VKUpdateDescriptorQueue& update_descriptor_queue_, 154 VKUpdateDescriptorQueue& update_descriptor_queue_)
154 VKRenderPassCache& renderpass_cache_) 155 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
155 : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_},
156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, 156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, 157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
158 update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {} 158 update_descriptor_queue_} {}
159 159
160VKPipelineCache::~VKPipelineCache() = default; 160VKPipelineCache::~VKPipelineCache() = default;
161 161
@@ -200,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
200} 200}
201 201
202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( 202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
203 const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { 203 const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
204 VideoCommon::Shader::AsyncShaders& async_shaders) {
204 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 205 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
205 206
206 if (last_graphics_pipeline && last_graphics_key == key) { 207 if (last_graphics_pipeline && last_graphics_key == key) {
@@ -216,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
216 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 217 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
217 const auto [program, bindings] = DecompileShaders(key.fixed_state); 218 const auto [program, bindings] = DecompileShaders(key.fixed_state);
218 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, 219 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
219 update_descriptor_queue, renderpass_cache, bindings, 220 update_descriptor_queue, bindings, program, key,
220 program, key); 221 num_color_buffers);
221 } 222 }
222 last_graphics_pipeline = pair->second.get(); 223 last_graphics_pipeline = pair->second.get();
223 return last_graphics_pipeline; 224 return last_graphics_pipeline;
@@ -230,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
230 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 231 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
231 const auto [program, bindings] = DecompileShaders(key.fixed_state); 232 const auto [program, bindings] = DecompileShaders(key.fixed_state);
232 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, 233 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
233 update_descriptor_queue, renderpass_cache, key, 234 update_descriptor_queue, key, bindings,
234 bindings, program); 235 program, num_color_buffers);
235 gpu.ShaderNotify().MarkShaderComplete(); 236 gpu.ShaderNotify().MarkShaderComplete();
236 } 237 }
237 last_graphics_pipeline = entry.get(); 238 last_graphics_pipeline = entry.get();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e558e6658..fbaa8257c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -19,7 +19,6 @@
19#include "video_core/engines/maxwell_3d.h" 19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
22#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
23#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 22#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
24#include "video_core/renderer_vulkan/wrapper.h" 23#include "video_core/renderer_vulkan/wrapper.h"
25#include "video_core/shader/async_shaders.h" 24#include "video_core/shader/async_shaders.h"
@@ -84,9 +83,9 @@ namespace Vulkan {
84 83
85class Shader { 84class Shader {
86public: 85public:
87 explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine, 86 explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_,
88 Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, 87 Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
89 VideoCommon::Shader::ProgramCode program_code, u32 main_offset); 88 VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
90 ~Shader(); 89 ~Shader();
91 90
92 GPUVAddr GetGpuAddr() const { 91 GPUVAddr GetGpuAddr() const {
@@ -124,13 +123,13 @@ public:
124 Tegra::Engines::KeplerCompute& kepler_compute, 123 Tegra::Engines::KeplerCompute& kepler_compute,
125 Tegra::MemoryManager& gpu_memory, const VKDevice& device, 124 Tegra::MemoryManager& gpu_memory, const VKDevice& device,
126 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, 125 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
127 VKUpdateDescriptorQueue& update_descriptor_queue, 126 VKUpdateDescriptorQueue& update_descriptor_queue);
128 VKRenderPassCache& renderpass_cache);
129 ~VKPipelineCache() override; 127 ~VKPipelineCache() override;
130 128
131 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); 129 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
132 130
133 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, 131 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
132 u32 num_color_buffers,
134 VideoCommon::Shader::AsyncShaders& async_shaders); 133 VideoCommon::Shader::AsyncShaders& async_shaders);
135 134
136 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 135 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
@@ -153,7 +152,6 @@ private:
153 VKScheduler& scheduler; 152 VKScheduler& scheduler;
154 VKDescriptorPool& descriptor_pool; 153 VKDescriptorPool& descriptor_pool;
155 VKUpdateDescriptorQueue& update_descriptor_queue; 154 VKUpdateDescriptorQueue& update_descriptor_queue;
156 VKRenderPassCache& renderpass_cache;
157 155
158 std::unique_ptr<Shader> null_shader; 156 std::unique_ptr<Shader> null_shader;
159 std::unique_ptr<Shader> null_kernel; 157 std::unique_ptr<Shader> null_kernel;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index ee2d871e3..038760de3 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -66,15 +66,13 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
66 usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; 66 usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
67} 67}
68 68
69VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer, 69VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
70 Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, 70 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
71 const VKDevice& device, VKScheduler& scheduler) 71 const VKDevice& device_, VKScheduler& scheduler_)
72 : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, 72 : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
73 HostCounter>{rasterizer, maxwell3d, gpu_memory}, 73 query_pools{
74 device{device}, scheduler{scheduler}, query_pools{ 74 QueryPool{device_, scheduler_, QueryType::SamplesPassed},
75 QueryPool{device, scheduler, 75 } {}
76 QueryType::SamplesPassed},
77 } {}
78 76
79VKQueryCache::~VKQueryCache() { 77VKQueryCache::~VKQueryCache() {
80 // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class 78 // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
@@ -95,12 +93,12 @@ void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
95 query_pools[static_cast<std::size_t>(type)].Reserve(query); 93 query_pools[static_cast<std::size_t>(type)].Reserve(query);
96} 94}
97 95
98HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, 96HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
99 QueryType type) 97 QueryType type_)
100 : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, 98 : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
101 type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} { 99 query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} {
102 const vk::Device* logical = &cache.Device().GetLogical(); 100 const vk::Device* logical = &cache_.Device().GetLogical();
103 cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { 101 cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
104 logical->ResetQueryPoolEXT(query.first, query.second, 1); 102 logical->ResetQueryPoolEXT(query.first, query.second, 1);
105 cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); 103 cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
106 }); 104 });
@@ -119,18 +117,20 @@ u64 HostCounter::BlockingQuery() const {
119 if (tick >= cache.Scheduler().CurrentTick()) { 117 if (tick >= cache.Scheduler().CurrentTick()) {
120 cache.Scheduler().Flush(); 118 cache.Scheduler().Flush();
121 } 119 }
120
122 u64 data; 121 u64 data;
123 const VkResult result = cache.Device().GetLogical().GetQueryResults( 122 const VkResult query_result = cache.Device().GetLogical().GetQueryResults(
124 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 123 query.first, query.second, 1, sizeof(data), &data, sizeof(data),
125 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); 124 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
126 switch (result) { 125
126 switch (query_result) {
127 case VK_SUCCESS: 127 case VK_SUCCESS:
128 return data; 128 return data;
129 case VK_ERROR_DEVICE_LOST: 129 case VK_ERROR_DEVICE_LOST:
130 cache.Device().ReportLoss(); 130 cache.Device().ReportLoss();
131 [[fallthrough]]; 131 [[fallthrough]];
132 default: 132 default:
133 throw vk::Exception(result); 133 throw vk::Exception(query_result);
134 } 134 }
135} 135}
136 136
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 2e57fb75d..837fe9ebf 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -53,9 +53,9 @@ private:
53class VKQueryCache final 53class VKQueryCache final
54 : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> { 54 : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> {
55public: 55public:
56 explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer, 56 explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
57 Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, 57 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
58 const VKDevice& device, VKScheduler& scheduler); 58 const VKDevice& device_, VKScheduler& scheduler_);
59 ~VKQueryCache(); 59 ~VKQueryCache();
60 60
61 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); 61 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
@@ -78,8 +78,8 @@ private:
78 78
79class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { 79class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
80public: 80public:
81 explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, 81 explicit HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
82 VideoCore::QueryType type); 82 VideoCore::QueryType type_);
83 ~HostCounter(); 83 ~HostCounter();
84 84
85 void EndQuery(); 85 void EndQuery();
@@ -95,8 +95,8 @@ private:
95 95
96class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { 96class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
97public: 97public:
98 explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) 98 explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
99 : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} 99 : CachedQueryBase{cpu_addr_, host_ptr_} {}
100}; 100};
101 101
102} // namespace Vulkan 102} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index e0fb8693f..04c5c859c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -19,6 +19,7 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "video_core/engines/kepler_compute.h" 20#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/blit_image.h"
22#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
23#include "video_core/renderer_vulkan/maxwell_to_vk.h" 24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
24#include "video_core/renderer_vulkan/renderer_vulkan.h" 25#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -30,8 +31,6 @@
30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 31#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
31#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 32#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
32#include "video_core/renderer_vulkan/vk_rasterizer.h" 33#include "video_core/renderer_vulkan/vk_rasterizer.h"
33#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
34#include "video_core/renderer_vulkan/vk_sampler_cache.h"
35#include "video_core/renderer_vulkan/vk_scheduler.h" 34#include "video_core/renderer_vulkan/vk_scheduler.h"
36#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 35#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
37#include "video_core/renderer_vulkan/vk_state_tracker.h" 36#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -39,10 +38,13 @@
39#include "video_core/renderer_vulkan/vk_update_descriptor.h" 38#include "video_core/renderer_vulkan/vk_update_descriptor.h"
40#include "video_core/renderer_vulkan/wrapper.h" 39#include "video_core/renderer_vulkan/wrapper.h"
41#include "video_core/shader_cache.h" 40#include "video_core/shader_cache.h"
41#include "video_core/texture_cache/texture_cache.h"
42 42
43namespace Vulkan { 43namespace Vulkan {
44 44
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 45using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46using VideoCommon::ImageViewId;
47using VideoCommon::ImageViewType;
46 48
47MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); 49MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
48MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); 50MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
@@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192
58 60
59namespace { 61namespace {
60 62
61constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); 63constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
62 64
63VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { 65VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) {
64 const auto& src = regs.viewport_transform[index]; 66 const auto& src = regs.viewport_transform[index];
65 const float width = src.scale_x * 2.0f; 67 const float width = src.scale_x * 2.0f;
66 const float height = src.scale_y * 2.0f; 68 const float height = src.scale_y * 2.0f;
@@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si
83 return viewport; 85 return viewport;
84} 86}
85 87
86VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { 88VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
87 const auto& src = regs.scissor_test[index]; 89 const auto& src = regs.scissor_test[index];
88 VkRect2D scissor; 90 VkRect2D scissor;
89 if (src.enable) { 91 if (src.enable) {
@@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
103std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( 105std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
104 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 106 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
105 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; 107 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
106 for (std::size_t i = 0; i < std::size(addresses); ++i) { 108 for (size_t i = 0; i < std::size(addresses); ++i) {
107 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; 109 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
108 } 110 }
109 return addresses; 111 return addresses;
110} 112}
111 113
112void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, 114struct TextureHandle {
113 VkAccessFlags access) { 115 constexpr TextureHandle(u32 data, bool via_header_index) {
114 for (auto& [view, layout] : views) { 116 const Tegra::Texture::TextureHandle handle{data};
115 view->Transition(*layout, pipeline_stage, access); 117 image = handle.tic_id;
118 sampler = via_header_index ? image : handle.tsc_id.Value();
116 } 119 }
117} 120
121 u32 image;
122 u32 sampler;
123};
118 124
119template <typename Engine, typename Entry> 125template <typename Engine, typename Entry>
120Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 126TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
121 std::size_t stage, std::size_t index = 0) { 127 size_t stage, size_t index = 0) {
122 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 128 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
123 if constexpr (std::is_same_v<Entry, SamplerEntry>) { 129 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
124 if (entry.is_separated) { 130 if (entry.is_separated) {
125 const u32 buffer_1 = entry.buffer; 131 const u32 buffer_1 = entry.buffer;
126 const u32 buffer_2 = entry.secondary_buffer; 132 const u32 buffer_2 = entry.secondary_buffer;
127 const u32 offset_1 = entry.offset; 133 const u32 offset_1 = entry.offset;
128 const u32 offset_2 = entry.secondary_offset; 134 const u32 offset_2 = entry.secondary_offset;
129 const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); 135 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
130 const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); 136 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
131 return engine.GetTextureInfo(handle_1 | handle_2); 137 return TextureHandle(handle_1 | handle_2, via_header_index);
132 } 138 }
133 } 139 }
134 if (entry.is_bindless) { 140 if (entry.is_bindless) {
135 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); 141 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
136 return engine.GetTextureInfo(tex_handle); 142 return TextureHandle(raw, via_header_index);
137 }
138 const auto& gpu_profile = engine.AccessGuestDriverProfile();
139 const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
140 const u32 offset = entry.offset + entry_offset;
141 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
142 return engine.GetStageTexture(stage_type, offset);
143 } else {
144 return engine.GetTexture(offset);
145 }
146}
147
148/// @brief Determine if an attachment to be updated has to preserve contents
149/// @param is_clear True when a clear is being executed
150/// @param regs 3D registers
151/// @return True when the contents have to be preserved
152bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
153 if (!is_clear) {
154 return true;
155 }
156 // First we have to make sure all clear masks are enabled.
157 if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
158 !regs.clear_buffers.A) {
159 return true;
160 }
161 // If scissors are disabled, the whole screen is cleared
162 if (!regs.clear_flags.scissor) {
163 return false;
164 } 143 }
165 // Then we have to confirm scissor testing clears the whole image 144 const u32 buffer = engine.GetBoundBuffer();
166 const std::size_t index = regs.clear_buffers.RT; 145 const u64 offset = (entry.offset + index) * sizeof(u32);
167 const auto& scissor = regs.scissor_test[0]; 146 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
168 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
169 scissor.max_y < regs.rt[index].height;
170} 147}
171 148
172/// @brief Determine if an attachment to be updated has to preserve contents 149template <size_t N>
173/// @param is_clear True when a clear is being executed
174/// @param regs 3D registers
175/// @return True when the contents have to be preserved
176bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
177 // If we are not clearing, the contents have to be preserved
178 if (!is_clear) {
179 return true;
180 }
181 // For depth stencil clears we only have to confirm scissor test covers the whole image
182 if (!regs.clear_flags.scissor) {
183 return false;
184 }
185 // Make sure the clear cover the whole image
186 const auto& scissor = regs.scissor_test[0];
187 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
188 scissor.max_y < regs.zeta_height;
189}
190
191template <std::size_t N>
192std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { 150std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
193 std::array<VkDeviceSize, N> expanded; 151 std::array<VkDeviceSize, N> expanded;
194 std::copy(strides.begin(), strides.end(), expanded.begin()); 152 std::copy(strides.begin(), strides.end(), expanded.begin());
195 return expanded; 153 return expanded;
196} 154}
197 155
156ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
157 if (entry.is_buffer) {
158 return ImageViewType::e2D;
159 }
160 switch (entry.type) {
161 case Tegra::Shader::TextureType::Texture1D:
162 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
163 case Tegra::Shader::TextureType::Texture2D:
164 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
165 case Tegra::Shader::TextureType::Texture3D:
166 return ImageViewType::e3D;
167 case Tegra::Shader::TextureType::TextureCube:
168 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
169 }
170 UNREACHABLE();
171 return ImageViewType::e2D;
172}
173
174ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
175 switch (entry.type) {
176 case Tegra::Shader::ImageType::Texture1D:
177 return ImageViewType::e1D;
178 case Tegra::Shader::ImageType::Texture1DArray:
179 return ImageViewType::e1DArray;
180 case Tegra::Shader::ImageType::Texture2D:
181 return ImageViewType::e2D;
182 case Tegra::Shader::ImageType::Texture2DArray:
183 return ImageViewType::e2DArray;
184 case Tegra::Shader::ImageType::Texture3D:
185 return ImageViewType::e3D;
186 case Tegra::Shader::ImageType::TextureBuffer:
187 return ImageViewType::Buffer;
188 }
189 UNREACHABLE();
190 return ImageViewType::e2D;
191}
192
193void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
194 VKUpdateDescriptorQueue& update_descriptor_queue,
195 ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
196 for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
197 const ImageViewId image_view_id = *image_view_id_ptr++;
198 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
199 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
200 }
201 for (const auto& entry : entries.samplers) {
202 for (size_t i = 0; i < entry.size; ++i) {
203 const VkSampler sampler = *sampler_ptr++;
204 const ImageViewId image_view_id = *image_view_id_ptr++;
205 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
206 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
207 update_descriptor_queue.AddSampledImage(handle, sampler);
208 }
209 }
210 for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
211 const ImageViewId image_view_id = *image_view_id_ptr++;
212 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
213 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
214 }
215 for (const auto& entry : entries.images) {
216 // TODO: Mark as modified
217 const ImageViewId image_view_id = *image_view_id_ptr++;
218 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
219 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
220 update_descriptor_queue.AddImage(handle);
221 }
222}
223
198} // Anonymous namespace 224} // Anonymous namespace
199 225
200class BufferBindings final { 226class BufferBindings final {
@@ -290,7 +316,7 @@ public:
290private: 316private:
291 // Some of these fields are intentionally left uninitialized to avoid initializing them twice. 317 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
292 struct { 318 struct {
293 std::size_t num_buffers = 0; 319 size_t num_buffers = 0;
294 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; 320 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
295 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; 321 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
296 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; 322 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
@@ -303,7 +329,7 @@ private:
303 VkIndexType type; 329 VkIndexType type;
304 } index; 330 } index;
305 331
306 template <std::size_t N> 332 template <size_t N>
307 void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { 333 void BindStatic(const VKDevice& device, VKScheduler& scheduler) const {
308 if (device.IsExtExtendedDynamicStateSupported()) { 334 if (device.IsExtExtendedDynamicStateSupported()) {
309 if (index.buffer) { 335 if (index.buffer) {
@@ -320,7 +346,7 @@ private:
320 } 346 }
321 } 347 }
322 348
323 template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> 349 template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
324 void BindStatic(VKScheduler& scheduler) const { 350 void BindStatic(VKScheduler& scheduler) const {
325 static_assert(N <= Maxwell::NumVertexArrays); 351 static_assert(N <= Maxwell::NumVertexArrays);
326 if constexpr (N == 0) { 352 if constexpr (N == 0) {
@@ -380,28 +406,31 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
380 } 406 }
381} 407}
382 408
383RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, 409RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
384 Tegra::MemoryManager& gpu_memory_, 410 Tegra::MemoryManager& gpu_memory_,
385 Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_, 411 Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
386 const VKDevice& device_, VKMemoryManager& memory_manager_, 412 const VKDevice& device_, VKMemoryManager& memory_manager_,
387 StateTracker& state_tracker_, VKScheduler& scheduler_) 413 StateTracker& state_tracker_, VKScheduler& scheduler_)
388 : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_), 414 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
389 maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), 415 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
390 device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), 416 screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_},
391 scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), 417 state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
392 descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), 418 staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler),
393 renderpass_cache(device), 419 update_descriptor_queue(device, scheduler),
420 blit_image(device, scheduler, state_tracker, descriptor_pool),
394 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 421 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
395 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 422 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
396 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 423 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
397 texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), 424 texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image},
425 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
398 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 426 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
399 descriptor_pool, update_descriptor_queue, renderpass_cache), 427 descriptor_pool, update_descriptor_queue),
400 buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool), 428 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer,
401 sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), 429 staging_pool),
430 query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
402 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, 431 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
403 scheduler), 432 scheduler),
404 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) { 433 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
405 scheduler.SetQueryCache(query_cache); 434 scheduler.SetQueryCache(query_cache);
406 if (device.UseAsynchronousShaders()) { 435 if (device.UseAsynchronousShaders()) {
407 async_shaders.AllocateWorkers(); 436 async_shaders.AllocateWorkers();
@@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
427 const DrawParameters draw_params = 456 const DrawParameters draw_params =
428 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); 457 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
429 458
430 update_descriptor_queue.Acquire(); 459 auto lock = texture_cache.AcquireLock();
431 sampled_views.clear(); 460 texture_cache.SynchronizeGraphicsDescriptors();
432 image_views.clear(); 461
462 texture_cache.UpdateRenderTargets(false);
433 463
434 const auto shaders = pipeline_cache.GetShaders(); 464 const auto shaders = pipeline_cache.GetShaders();
435 key.shaders = GetShaderAddresses(shaders); 465 key.shaders = GetShaderAddresses(shaders);
@@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
437 467
438 buffer_cache.Unmap(); 468 buffer_cache.Unmap();
439 469
440 const Texceptions texceptions = UpdateAttachments(false); 470 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
441 SetupImageTransitions(texceptions, color_attachments, zeta_attachment); 471 key.renderpass = framebuffer->RenderPass();
442
443 key.renderpass_params = GetRenderPassParams(texceptions);
444 key.padding = 0;
445 472
446 auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); 473 auto* const pipeline =
474 pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders);
447 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { 475 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
448 // Async graphics pipeline was not ready. 476 // Async graphics pipeline was not ready.
449 return; 477 return;
450 } 478 }
451 479
452 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
453
454 const auto renderpass = pipeline->GetRenderPass();
455 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
456 scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
457
458 UpdateDynamicStates();
459
460 buffer_bindings.Bind(device, scheduler); 480 buffer_bindings.Bind(device, scheduler);
461 481
462 BeginTransformFeedback(); 482 BeginTransformFeedback();
463 483
484 scheduler.RequestRenderpass(framebuffer);
485 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
486 UpdateDynamicStates();
487
464 const auto pipeline_layout = pipeline->GetLayout(); 488 const auto pipeline_layout = pipeline->GetLayout();
465 const auto descriptor_set = pipeline->CommitDescriptorSet(); 489 const auto descriptor_set = pipeline->CommitDescriptorSet();
466 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 490 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
@@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() {
481 return; 505 return;
482 } 506 }
483 507
484 sampled_views.clear();
485 image_views.clear();
486
487 query_cache.UpdateCounters(); 508 query_cache.UpdateCounters();
488 509
489 const auto& regs = maxwell3d.regs; 510 const auto& regs = maxwell3d.regs;
@@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() {
495 return; 516 return;
496 } 517 }
497 518
498 [[maybe_unused]] const auto texceptions = UpdateAttachments(true); 519 auto lock = texture_cache.AcquireLock();
499 DEBUG_ASSERT(texceptions.none()); 520 texture_cache.UpdateRenderTargets(true);
500 SetupImageTransitions(0, color_attachments, zeta_attachment); 521 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
522 const VkExtent2D render_area = framebuffer->RenderArea();
523 scheduler.RequestRenderpass(framebuffer);
501 524
502 const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); 525 VkClearRect clear_rect{
503 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); 526 .rect = GetScissorState(regs, 0),
504 scheduler.RequestRenderpass(renderpass, framebuffer, render_area); 527 .baseArrayLayer = regs.clear_buffers.layer,
505 528 .layerCount = 1,
506 VkClearRect clear_rect; 529 };
507 clear_rect.baseArrayLayer = regs.clear_buffers.layer; 530 if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
508 clear_rect.layerCount = 1; 531 return;
509 clear_rect.rect = GetScissorState(regs, 0); 532 }
510 clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); 533 clear_rect.rect.extent = VkExtent2D{
511 clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); 534 .width = std::min(clear_rect.rect.extent.width, render_area.width),
535 .height = std::min(clear_rect.rect.extent.height, render_area.height),
536 };
512 537
513 if (use_color) { 538 if (use_color) {
514 VkClearValue clear_value; 539 VkClearValue clear_value;
@@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() {
549 574
550void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 575void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
551 MICROPROFILE_SCOPE(Vulkan_Compute); 576 MICROPROFILE_SCOPE(Vulkan_Compute);
552 update_descriptor_queue.Acquire();
553 sampled_views.clear();
554 image_views.clear();
555 577
556 query_cache.UpdateCounters(); 578 query_cache.UpdateCounters();
557 579
@@ -570,29 +592,43 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
570 // Compute dispatches can't be executed inside a renderpass 592 // Compute dispatches can't be executed inside a renderpass
571 scheduler.RequestOutsideRenderPassOperationContext(); 593 scheduler.RequestOutsideRenderPassOperationContext();
572 594
573 buffer_cache.Map(CalculateComputeStreamBufferSize()); 595 image_view_indices.clear();
596 sampler_handles.clear();
597
598 auto lock = texture_cache.AcquireLock();
599 texture_cache.SynchronizeComputeDescriptors();
574 600
575 const auto& entries = pipeline.GetEntries(); 601 const auto& entries = pipeline.GetEntries();
576 SetupComputeConstBuffers(entries);
577 SetupComputeGlobalBuffers(entries);
578 SetupComputeUniformTexels(entries); 602 SetupComputeUniformTexels(entries);
579 SetupComputeTextures(entries); 603 SetupComputeTextures(entries);
580 SetupComputeStorageTexels(entries); 604 SetupComputeStorageTexels(entries);
581 SetupComputeImages(entries); 605 SetupComputeImages(entries);
582 606
583 buffer_cache.Unmap(); 607 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
608 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
584 609
585 TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 610 buffer_cache.Map(CalculateComputeStreamBufferSize());
586 VK_ACCESS_SHADER_READ_BIT);
587 TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
588 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
589 611
612 update_descriptor_queue.Acquire();
613
614 SetupComputeConstBuffers(entries);
615 SetupComputeGlobalBuffers(entries);
616
617 ImageViewId* image_view_id_ptr = image_view_ids.data();
618 VkSampler* sampler_ptr = sampler_handles.data();
619 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
620 sampler_ptr);
621
622 buffer_cache.Unmap();
623
624 const VkPipeline pipeline_handle = pipeline.GetHandle();
625 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
626 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
590 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, 627 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
591 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), 628 grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
592 layout = pipeline.GetLayout(), 629 descriptor_set](vk::CommandBuffer cmdbuf) {
593 descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) {
594 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); 630 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
595 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, 631 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET,
596 descriptor_set, {}); 632 descriptor_set, {});
597 cmdbuf.Dispatch(grid_x, grid_y, grid_z); 633 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
598 }); 634 });
@@ -613,7 +649,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
613 if (addr == 0 || size == 0) { 649 if (addr == 0 || size == 0) {
614 return; 650 return;
615 } 651 }
616 texture_cache.FlushRegion(addr, size); 652 {
653 auto lock = texture_cache.AcquireLock();
654 texture_cache.DownloadMemory(addr, size);
655 }
617 buffer_cache.FlushRegion(addr, size); 656 buffer_cache.FlushRegion(addr, size);
618 query_cache.FlushRegion(addr, size); 657 query_cache.FlushRegion(addr, size);
619} 658}
@@ -622,14 +661,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
622 if (!Settings::IsGPULevelHigh()) { 661 if (!Settings::IsGPULevelHigh()) {
623 return buffer_cache.MustFlushRegion(addr, size); 662 return buffer_cache.MustFlushRegion(addr, size);
624 } 663 }
625 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); 664 return texture_cache.IsRegionGpuModified(addr, size) ||
665 buffer_cache.MustFlushRegion(addr, size);
626} 666}
627 667
628void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 668void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
629 if (addr == 0 || size == 0) { 669 if (addr == 0 || size == 0) {
630 return; 670 return;
631 } 671 }
632 texture_cache.InvalidateRegion(addr, size); 672 {
673 auto lock = texture_cache.AcquireLock();
674 texture_cache.WriteMemory(addr, size);
675 }
633 pipeline_cache.InvalidateRegion(addr, size); 676 pipeline_cache.InvalidateRegion(addr, size);
634 buffer_cache.InvalidateRegion(addr, size); 677 buffer_cache.InvalidateRegion(addr, size);
635 query_cache.InvalidateRegion(addr, size); 678 query_cache.InvalidateRegion(addr, size);
@@ -639,17 +682,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
639 if (addr == 0 || size == 0) { 682 if (addr == 0 || size == 0) {
640 return; 683 return;
641 } 684 }
642 texture_cache.OnCPUWrite(addr, size); 685 {
686 auto lock = texture_cache.AcquireLock();
687 texture_cache.WriteMemory(addr, size);
688 }
643 pipeline_cache.OnCPUWrite(addr, size); 689 pipeline_cache.OnCPUWrite(addr, size);
644 buffer_cache.OnCPUWrite(addr, size); 690 buffer_cache.OnCPUWrite(addr, size);
645} 691}
646 692
647void RasterizerVulkan::SyncGuestHost() { 693void RasterizerVulkan::SyncGuestHost() {
648 texture_cache.SyncGuestHost();
649 buffer_cache.SyncGuestHost(); 694 buffer_cache.SyncGuestHost();
650 pipeline_cache.SyncGuestHost(); 695 pipeline_cache.SyncGuestHost();
651} 696}
652 697
698void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
699 {
700 auto lock = texture_cache.AcquireLock();
701 texture_cache.UnmapMemory(addr, size);
702 }
703 buffer_cache.OnCPUWrite(addr, size);
704 pipeline_cache.OnCPUWrite(addr, size);
705}
706
653void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 707void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
654 if (!gpu.IsAsync()) { 708 if (!gpu.IsAsync()) {
655 gpu_memory.Write<u32>(addr, value); 709 gpu_memory.Write<u32>(addr, value);
@@ -700,6 +754,14 @@ void RasterizerVulkan::WaitForIdle() {
700 }); 754 });
701} 755}
702 756
757void RasterizerVulkan::FragmentBarrier() {
758 // We already put barriers when a render pass finishes
759}
760
761void RasterizerVulkan::TiledCacheBarrier() {
762 // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend
763}
764
703void RasterizerVulkan::FlushCommands() { 765void RasterizerVulkan::FlushCommands() {
704 if (draw_counter > 0) { 766 if (draw_counter > 0) {
705 draw_counter = 0; 767 draw_counter = 0;
@@ -710,14 +772,20 @@ void RasterizerVulkan::FlushCommands() {
710void RasterizerVulkan::TickFrame() { 772void RasterizerVulkan::TickFrame() {
711 draw_counter = 0; 773 draw_counter = 0;
712 update_descriptor_queue.TickFrame(); 774 update_descriptor_queue.TickFrame();
775 fence_manager.TickFrame();
713 buffer_cache.TickFrame(); 776 buffer_cache.TickFrame();
714 staging_pool.TickFrame(); 777 staging_pool.TickFrame();
778 {
779 auto lock = texture_cache.AcquireLock();
780 texture_cache.TickFrame();
781 }
715} 782}
716 783
717bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 784bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
718 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 785 const Tegra::Engines::Fermi2D::Surface& dst,
719 const Tegra::Engines::Fermi2D::Config& copy_config) { 786 const Tegra::Engines::Fermi2D::Config& copy_config) {
720 texture_cache.DoFermiCopy(src, dst, copy_config); 787 auto lock = texture_cache.AcquireLock();
788 texture_cache.BlitImage(dst, src, copy_config);
721 return true; 789 return true;
722} 790}
723 791
@@ -727,20 +795,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
727 return false; 795 return false;
728 } 796 }
729 797
730 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; 798 auto lock = texture_cache.AcquireLock();
731 if (!surface) { 799 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
800 if (!image_view) {
732 return false; 801 return false;
733 } 802 }
734 803
735 // Verify that the cached surface is the same size and format as the requested framebuffer 804 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
736 const auto& params{surface->GetSurfaceParams()}; 805 screen_info.width = image_view->size.width;
737 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 806 screen_info.height = image_view->size.height;
738 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 807 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
739
740 screen_info.image = &surface->GetImage();
741 screen_info.width = params.width;
742 screen_info.height = params.height;
743 screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
744 return true; 808 return true;
745} 809}
746 810
@@ -765,103 +829,6 @@ void RasterizerVulkan::FlushWork() {
765 draw_counter = 0; 829 draw_counter = 0;
766} 830}
767 831
768RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
769 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
770
771 const auto& regs = maxwell3d.regs;
772 auto& dirty = maxwell3d.dirty.flags;
773 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
774 dirty[VideoCommon::Dirty::RenderTargets] = false;
775
776 texture_cache.GuardRenderTargets(true);
777
778 Texceptions texceptions;
779 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
780 if (update_rendertargets) {
781 const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
782 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
783 }
784 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
785 texceptions[rt] = true;
786 }
787 }
788
789 if (update_rendertargets) {
790 const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
791 zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
792 }
793 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
794 texceptions[ZETA_TEXCEPTION_INDEX] = true;
795 }
796
797 texture_cache.GuardRenderTargets(false);
798
799 return texceptions;
800}
801
802bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
803 bool overlap = false;
804 for (auto& [view, layout] : sampled_views) {
805 if (!attachment.IsSameSurface(*view)) {
806 continue;
807 }
808 overlap = true;
809 *layout = VK_IMAGE_LAYOUT_GENERAL;
810 }
811 return overlap;
812}
813
814std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
815 VkRenderPass renderpass) {
816 FramebufferCacheKey key{
817 .renderpass = renderpass,
818 .width = std::numeric_limits<u32>::max(),
819 .height = std::numeric_limits<u32>::max(),
820 .layers = std::numeric_limits<u32>::max(),
821 .views = {},
822 };
823
824 const auto try_push = [&key](const View& view) {
825 if (!view) {
826 return false;
827 }
828 key.views.push_back(view->GetAttachment());
829 key.width = std::min(key.width, view->GetWidth());
830 key.height = std::min(key.height, view->GetHeight());
831 key.layers = std::min(key.layers, view->GetNumLayers());
832 return true;
833 };
834
835 const auto& regs = maxwell3d.regs;
836 const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
837 for (std::size_t index = 0; index < num_attachments; ++index) {
838 if (try_push(color_attachments[index])) {
839 texture_cache.MarkColorBufferInUse(index);
840 }
841 }
842 if (try_push(zeta_attachment)) {
843 texture_cache.MarkDepthBufferInUse();
844 }
845
846 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
847 auto& framebuffer = fbentry->second;
848 if (is_cache_miss) {
849 framebuffer = device.GetLogical().CreateFramebuffer({
850 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
851 .pNext = nullptr,
852 .flags = 0,
853 .renderPass = key.renderpass,
854 .attachmentCount = static_cast<u32>(key.views.size()),
855 .pAttachments = key.views.data(),
856 .width = key.width,
857 .height = key.height,
858 .layers = key.layers,
859 });
860 }
861
862 return {*framebuffer, VkExtent2D{key.width, key.height}};
863}
864
865RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, 832RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
866 BufferBindings& buffer_bindings, 833 BufferBindings& buffer_bindings,
867 bool is_indexed, 834 bool is_indexed,
@@ -885,51 +852,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
885 852
886void RasterizerVulkan::SetupShaderDescriptors( 853void RasterizerVulkan::SetupShaderDescriptors(
887 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 854 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
888 texture_cache.GuardSamplers(true); 855 image_view_indices.clear();
889 856 sampler_handles.clear();
890 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 857 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
891 // Skip VertexA stage
892 Shader* const shader = shaders[stage + 1]; 858 Shader* const shader = shaders[stage + 1];
893 if (!shader) { 859 if (!shader) {
894 continue; 860 continue;
895 } 861 }
896 const auto& entries = shader->GetEntries(); 862 const auto& entries = shader->GetEntries();
897 SetupGraphicsConstBuffers(entries, stage);
898 SetupGraphicsGlobalBuffers(entries, stage);
899 SetupGraphicsUniformTexels(entries, stage); 863 SetupGraphicsUniformTexels(entries, stage);
900 SetupGraphicsTextures(entries, stage); 864 SetupGraphicsTextures(entries, stage);
901 SetupGraphicsStorageTexels(entries, stage); 865 SetupGraphicsStorageTexels(entries, stage);
902 SetupGraphicsImages(entries, stage); 866 SetupGraphicsImages(entries, stage);
903 } 867 }
904 texture_cache.GuardSamplers(false); 868 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
905} 869 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
906 870
907void RasterizerVulkan::SetupImageTransitions( 871 update_descriptor_queue.Acquire();
908 Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
909 const View& zeta_attachment) {
910 TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT);
911 TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
912 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
913 872
914 for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { 873 ImageViewId* image_view_id_ptr = image_view_ids.data();
915 const auto color_attachment = color_attachments[rt]; 874 VkSampler* sampler_ptr = sampler_handles.data();
916 if (color_attachment == nullptr) { 875 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
876 // Skip VertexA stage
877 Shader* const shader = shaders[stage + 1];
878 if (!shader) {
917 continue; 879 continue;
918 } 880 }
919 const auto image_layout = 881 const auto& entries = shader->GetEntries();
920 texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 882 SetupGraphicsConstBuffers(entries, stage);
921 color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 883 SetupGraphicsGlobalBuffers(entries, stage);
922 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 884 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
923 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); 885 sampler_ptr);
924 }
925
926 if (zeta_attachment != nullptr) {
927 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
928 ? VK_IMAGE_LAYOUT_GENERAL
929 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
930 zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
931 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
932 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
933 } 886 }
934} 887}
935 888
@@ -1001,7 +954,7 @@ void RasterizerVulkan::EndTransformFeedback() {
1001void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { 954void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
1002 const auto& regs = maxwell3d.regs; 955 const auto& regs = maxwell3d.regs;
1003 956
1004 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 957 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
1005 const auto& vertex_array = regs.vertex_array[index]; 958 const auto& vertex_array = regs.vertex_array[index];
1006 if (!vertex_array.IsEnabled()) { 959 if (!vertex_array.IsEnabled()) {
1007 continue; 960 continue;
@@ -1010,7 +963,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
1010 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 963 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1011 964
1012 ASSERT(end >= start); 965 ASSERT(end >= start);
1013 const std::size_t size = end - start; 966 const size_t size = end - start;
1014 if (size == 0) { 967 if (size == 0) {
1015 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); 968 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
1016 continue; 969 continue;
@@ -1071,7 +1024,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
1071 } 1024 }
1072} 1025}
1073 1026
1074void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { 1027void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
1075 MICROPROFILE_SCOPE(Vulkan_ConstBuffers); 1028 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1076 const auto& shader_stage = maxwell3d.state.shader_stages[stage]; 1029 const auto& shader_stage = maxwell3d.state.shader_stages[stage];
1077 for (const auto& entry : entries.const_buffers) { 1030 for (const auto& entry : entries.const_buffers) {
@@ -1079,7 +1032,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
1079 } 1032 }
1080} 1033}
1081 1034
1082void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { 1035void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
1083 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); 1036 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1084 const auto& cbufs{maxwell3d.state.shader_stages[stage]}; 1037 const auto& cbufs{maxwell3d.state.shader_stages[stage]};
1085 1038
@@ -1089,37 +1042,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
1089 } 1042 }
1090} 1043}
1091 1044
1092void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { 1045void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
1093 MICROPROFILE_SCOPE(Vulkan_Textures); 1046 MICROPROFILE_SCOPE(Vulkan_Textures);
1047 const auto& regs = maxwell3d.regs;
1048 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1094 for (const auto& entry : entries.uniform_texels) { 1049 for (const auto& entry : entries.uniform_texels) {
1095 const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; 1050 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1096 SetupUniformTexels(image, entry); 1051 image_view_indices.push_back(handle.image);
1097 } 1052 }
1098} 1053}
1099 1054
1100void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { 1055void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
1101 MICROPROFILE_SCOPE(Vulkan_Textures); 1056 MICROPROFILE_SCOPE(Vulkan_Textures);
1057 const auto& regs = maxwell3d.regs;
1058 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1102 for (const auto& entry : entries.samplers) { 1059 for (const auto& entry : entries.samplers) {
1103 for (std::size_t i = 0; i < entry.size; ++i) { 1060 for (size_t index = 0; index < entry.size; ++index) {
1104 const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); 1061 const TextureHandle handle =
1105 SetupTexture(texture, entry); 1062 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
1063 image_view_indices.push_back(handle.image);
1064
1065 Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
1066 sampler_handles.push_back(sampler->Handle());
1106 } 1067 }
1107 } 1068 }
1108} 1069}
1109 1070
1110void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { 1071void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
1111 MICROPROFILE_SCOPE(Vulkan_Textures); 1072 MICROPROFILE_SCOPE(Vulkan_Textures);
1073 const auto& regs = maxwell3d.regs;
1074 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1112 for (const auto& entry : entries.storage_texels) { 1075 for (const auto& entry : entries.storage_texels) {
1113 const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; 1076 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1114 SetupStorageTexel(image, entry); 1077 image_view_indices.push_back(handle.image);
1115 } 1078 }
1116} 1079}
1117 1080
1118void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { 1081void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
1119 MICROPROFILE_SCOPE(Vulkan_Images); 1082 MICROPROFILE_SCOPE(Vulkan_Images);
1083 const auto& regs = maxwell3d.regs;
1084 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1120 for (const auto& entry : entries.images) { 1085 for (const auto& entry : entries.images) {
1121 const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; 1086 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1122 SetupImage(tic, entry); 1087 image_view_indices.push_back(handle.image);
1123 } 1088 }
1124} 1089}
1125 1090
@@ -1129,11 +1094,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
1129 for (const auto& entry : entries.const_buffers) { 1094 for (const auto& entry : entries.const_buffers) {
1130 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 1095 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
1131 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 1096 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
1132 Tegra::Engines::ConstBufferInfo buffer; 1097 const Tegra::Engines::ConstBufferInfo info{
1133 buffer.address = config.Address(); 1098 .address = config.Address(),
1134 buffer.size = config.size; 1099 .size = config.size,
1135 buffer.enabled = mask[entry.GetIndex()]; 1100 .enabled = mask[entry.GetIndex()],
1136 SetupConstBuffer(entry, buffer); 1101 };
1102 SetupConstBuffer(entry, info);
1137 } 1103 }
1138} 1104}
1139 1105
@@ -1148,35 +1114,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1148 1114
1149void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { 1115void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1150 MICROPROFILE_SCOPE(Vulkan_Textures); 1116 MICROPROFILE_SCOPE(Vulkan_Textures);
1117 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1151 for (const auto& entry : entries.uniform_texels) { 1118 for (const auto& entry : entries.uniform_texels) {
1152 const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1119 const TextureHandle handle =
1153 SetupUniformTexels(image, entry); 1120 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1121 image_view_indices.push_back(handle.image);
1154 } 1122 }
1155} 1123}
1156 1124
1157void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { 1125void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1158 MICROPROFILE_SCOPE(Vulkan_Textures); 1126 MICROPROFILE_SCOPE(Vulkan_Textures);
1127 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1159 for (const auto& entry : entries.samplers) { 1128 for (const auto& entry : entries.samplers) {
1160 for (std::size_t i = 0; i < entry.size; ++i) { 1129 for (size_t index = 0; index < entry.size; ++index) {
1161 const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); 1130 const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
1162 SetupTexture(texture, entry); 1131 COMPUTE_SHADER_INDEX, index);
1132 image_view_indices.push_back(handle.image);
1133
1134 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
1135 sampler_handles.push_back(sampler->Handle());
1163 } 1136 }
1164 } 1137 }
1165} 1138}
1166 1139
1167void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { 1140void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1168 MICROPROFILE_SCOPE(Vulkan_Textures); 1141 MICROPROFILE_SCOPE(Vulkan_Textures);
1142 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1169 for (const auto& entry : entries.storage_texels) { 1143 for (const auto& entry : entries.storage_texels) {
1170 const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1144 const TextureHandle handle =
1171 SetupStorageTexel(image, entry); 1145 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1146 image_view_indices.push_back(handle.image);
1172 } 1147 }
1173} 1148}
1174 1149
1175void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 1150void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1176 MICROPROFILE_SCOPE(Vulkan_Images); 1151 MICROPROFILE_SCOPE(Vulkan_Images);
1152 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1177 for (const auto& entry : entries.images) { 1153 for (const auto& entry : entries.images) {
1178 const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1154 const TextureHandle handle =
1179 SetupImage(tic, entry); 1155 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1156 image_view_indices.push_back(handle.image);
1180 } 1157 }
1181} 1158}
1182 1159
@@ -1187,14 +1164,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1187 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); 1164 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
1188 return; 1165 return;
1189 } 1166 }
1190
1191 // Align the size to avoid bad std140 interactions 1167 // Align the size to avoid bad std140 interactions
1192 const std::size_t size = 1168 const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1193 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1194 ASSERT(size <= MaxConstbufferSize); 1169 ASSERT(size <= MaxConstbufferSize);
1195 1170
1196 const auto info = 1171 const u64 alignment = device.GetUniformBufferAlignment();
1197 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); 1172 const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
1198 update_descriptor_queue.AddBuffer(info.handle, info.offset, size); 1173 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1199} 1174}
1200 1175
@@ -1207,7 +1182,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1207 // because Vulkan doesn't like empty buffers. 1182 // because Vulkan doesn't like empty buffers.
1208 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the 1183 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1209 // default buffer. 1184 // default buffer.
1210 static constexpr std::size_t dummy_size = 4; 1185 static constexpr size_t dummy_size = 4;
1211 const auto info = buffer_cache.GetEmptyBuffer(dummy_size); 1186 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1212 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); 1187 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1213 return; 1188 return;
@@ -1218,55 +1193,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1218 update_descriptor_queue.AddBuffer(info.handle, info.offset, size); 1193 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1219} 1194}
1220 1195
1221void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
1222 const UniformTexelEntry& entry) {
1223 const auto view = texture_cache.GetTextureSurface(tic, entry);
1224 ASSERT(view->IsBufferView());
1225
1226 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
1227}
1228
1229void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
1230 const SamplerEntry& entry) {
1231 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1232 ASSERT(!view->IsBufferView());
1233
1234 const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
1235 texture.tic.z_source, texture.tic.w_source);
1236 const auto sampler = sampler_cache.GetSampler(texture.tsc);
1237 update_descriptor_queue.AddSampledImage(sampler, image_view);
1238
1239 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1240 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1241 sampled_views.push_back(ImageView{std::move(view), image_layout});
1242}
1243
1244void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
1245 const StorageTexelEntry& entry) {
1246 const auto view = texture_cache.GetImageSurface(tic, entry);
1247 ASSERT(view->IsBufferView());
1248
1249 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
1250}
1251
1252void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
1253 auto view = texture_cache.GetImageSurface(tic, entry);
1254
1255 if (entry.is_written) {
1256 view->MarkAsModified(texture_cache.Tick());
1257 }
1258
1259 UNIMPLEMENTED_IF(tic.IsBuffer());
1260
1261 const VkImageView image_view =
1262 view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1263 update_descriptor_queue.AddImage(image_view);
1264
1265 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1266 *image_layout = VK_IMAGE_LAYOUT_GENERAL;
1267 image_views.push_back(ImageView{std::move(view), image_layout});
1268}
1269
1270void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 1196void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
1271 if (!state_tracker.TouchViewports()) { 1197 if (!state_tracker.TouchViewports()) {
1272 return; 1198 return;
@@ -1458,8 +1384,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1458 }); 1384 });
1459} 1385}
1460 1386
1461std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { 1387size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1462 std::size_t size = CalculateVertexArraysSize(); 1388 size_t size = CalculateVertexArraysSize();
1463 if (is_indexed) { 1389 if (is_indexed) {
1464 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); 1390 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1465 } 1391 }
@@ -1467,15 +1393,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed)
1467 return size; 1393 return size;
1468} 1394}
1469 1395
1470std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { 1396size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1471 return Tegra::Engines::KeplerCompute::NumConstBuffers * 1397 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1472 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 1398 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1473} 1399}
1474 1400
1475std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { 1401size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1476 const auto& regs = maxwell3d.regs; 1402 const auto& regs = maxwell3d.regs;
1477 1403
1478 std::size_t size = 0; 1404 size_t size = 0;
1479 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 1405 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1480 // This implementation assumes that all attributes are used in the shader. 1406 // This implementation assumes that all attributes are used in the shader.
1481 const GPUVAddr start{regs.vertex_array[index].StartAddress()}; 1407 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
@@ -1487,12 +1413,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1487 return size; 1413 return size;
1488} 1414}
1489 1415
1490std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { 1416size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1491 return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * 1417 return static_cast<size_t>(maxwell3d.regs.index_array.count) *
1492 static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); 1418 static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
1493} 1419}
1494 1420
1495std::size_t RasterizerVulkan::CalculateConstBufferSize( 1421size_t RasterizerVulkan::CalculateConstBufferSize(
1496 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { 1422 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1497 if (entry.IsIndirect()) { 1423 if (entry.IsIndirect()) {
1498 // Buffer is accessed indirectly, so upload the entire thing 1424 // Buffer is accessed indirectly, so upload the entire thing
@@ -1503,37 +1429,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
1503 } 1429 }
1504} 1430}
1505 1431
1506RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
1507 const auto& regs = maxwell3d.regs;
1508 const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
1509
1510 RenderPassParams params;
1511 params.color_formats = {};
1512 std::size_t color_texceptions = 0;
1513
1514 std::size_t index = 0;
1515 for (std::size_t rt = 0; rt < num_attachments; ++rt) {
1516 const auto& rendertarget = regs.rt[rt];
1517 if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
1518 continue;
1519 }
1520 params.color_formats[index] = static_cast<u8>(rendertarget.format);
1521 color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index;
1522 ++index;
1523 }
1524 params.num_color_attachments = static_cast<u8>(index);
1525 params.texceptions = static_cast<u8>(color_texceptions);
1526
1527 params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0;
1528 params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
1529 return params;
1530}
1531
1532VkBuffer RasterizerVulkan::DefaultBuffer() { 1432VkBuffer RasterizerVulkan::DefaultBuffer() {
1533 if (default_buffer) { 1433 if (default_buffer) {
1534 return *default_buffer; 1434 return *default_buffer;
1535 } 1435 }
1536
1537 default_buffer = device.GetLogical().CreateBuffer({ 1436 default_buffer = device.GetLogical().CreateBuffer({
1538 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 1437 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1539 .pNext = nullptr, 1438 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 237e51fa4..990f9e031 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -11,11 +11,11 @@
11#include <vector> 11#include <vector>
12 12
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14#include <boost/functional/hash.hpp>
15 14
16#include "common/common_types.h" 15#include "common/common_types.h"
17#include "video_core/rasterizer_accelerated.h" 16#include "video_core/rasterizer_accelerated.h"
18#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/renderer_vulkan/blit_image.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h" 21#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -24,10 +24,9 @@
24#include "video_core/renderer_vulkan/vk_memory_manager.h" 24#include "video_core/renderer_vulkan/vk_memory_manager.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 25#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 26#include "video_core/renderer_vulkan/vk_query_cache.h"
27#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
28#include "video_core/renderer_vulkan/vk_sampler_cache.h"
29#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
30#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
29#include "video_core/renderer_vulkan/vk_stream_buffer.h"
31#include "video_core/renderer_vulkan/vk_texture_cache.h" 30#include "video_core/renderer_vulkan/vk_texture_cache.h"
32#include "video_core/renderer_vulkan/vk_update_descriptor.h" 31#include "video_core/renderer_vulkan/vk_update_descriptor.h"
33#include "video_core/renderer_vulkan/wrapper.h" 32#include "video_core/renderer_vulkan/wrapper.h"
@@ -49,67 +48,16 @@ namespace Vulkan {
49 48
50struct VKScreenInfo; 49struct VKScreenInfo;
51 50
52using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>;
53
54struct FramebufferCacheKey {
55 VkRenderPass renderpass{};
56 u32 width = 0;
57 u32 height = 0;
58 u32 layers = 0;
59 ImageViewsPack views;
60
61 std::size_t Hash() const noexcept {
62 std::size_t hash = 0;
63 boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
64 for (const auto& view : views) {
65 boost::hash_combine(hash, static_cast<VkImageView>(view));
66 }
67 boost::hash_combine(hash, width);
68 boost::hash_combine(hash, height);
69 boost::hash_combine(hash, layers);
70 return hash;
71 }
72
73 bool operator==(const FramebufferCacheKey& rhs) const noexcept {
74 return std::tie(renderpass, views, width, height, layers) ==
75 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
76 }
77
78 bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
79 return !operator==(rhs);
80 }
81};
82
83} // namespace Vulkan
84
85namespace std {
86
87template <>
88struct hash<Vulkan::FramebufferCacheKey> {
89 std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
90 return k.Hash();
91 }
92};
93
94} // namespace std
95
96namespace Vulkan {
97
98class StateTracker; 51class StateTracker;
99class BufferBindings; 52class BufferBindings;
100 53
101struct ImageView {
102 View view;
103 VkImageLayout* layout = nullptr;
104};
105
106class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 54class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
107public: 55public:
108 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, 56 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
109 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 57 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
110 VKScreenInfo& screen_info, const VKDevice& device, 58 VKScreenInfo& screen_info_, const VKDevice& device_,
111 VKMemoryManager& memory_manager, StateTracker& state_tracker, 59 VKMemoryManager& memory_manager_, StateTracker& state_tracker_,
112 VKScheduler& scheduler); 60 VKScheduler& scheduler_);
113 ~RasterizerVulkan() override; 61 ~RasterizerVulkan() override;
114 62
115 void Draw(bool is_indexed, bool is_instanced) override; 63 void Draw(bool is_indexed, bool is_instanced) override;
@@ -123,15 +71,18 @@ public:
123 void InvalidateRegion(VAddr addr, u64 size) override; 71 void InvalidateRegion(VAddr addr, u64 size) override;
124 void OnCPUWrite(VAddr addr, u64 size) override; 72 void OnCPUWrite(VAddr addr, u64 size) override;
125 void SyncGuestHost() override; 73 void SyncGuestHost() override;
74 void UnmapMemory(VAddr addr, u64 size) override;
126 void SignalSemaphore(GPUVAddr addr, u32 value) override; 75 void SignalSemaphore(GPUVAddr addr, u32 value) override;
127 void SignalSyncPoint(u32 value) override; 76 void SignalSyncPoint(u32 value) override;
128 void ReleaseFences() override; 77 void ReleaseFences() override;
129 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 78 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
130 void WaitForIdle() override; 79 void WaitForIdle() override;
80 void FragmentBarrier() override;
81 void TiledCacheBarrier() override;
131 void FlushCommands() override; 82 void FlushCommands() override;
132 void TickFrame() override; 83 void TickFrame() override;
133 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 84 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
134 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 85 const Tegra::Engines::Fermi2D::Surface& dst,
135 const Tegra::Engines::Fermi2D::Config& copy_config) override; 86 const Tegra::Engines::Fermi2D::Config& copy_config) override;
136 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 87 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
137 u32 pixel_stride) override; 88 u32 pixel_stride) override;
@@ -145,11 +96,17 @@ public:
145 } 96 }
146 97
147 /// Maximum supported size that a constbuffer can have in bytes. 98 /// Maximum supported size that a constbuffer can have in bytes.
148 static constexpr std::size_t MaxConstbufferSize = 0x10000; 99 static constexpr size_t MaxConstbufferSize = 0x10000;
149 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, 100 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
150 "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); 101 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
151 102
152private: 103private:
104 static constexpr size_t MAX_TEXTURES = 192;
105 static constexpr size_t MAX_IMAGES = 48;
106 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
107
108 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
109
153 struct DrawParameters { 110 struct DrawParameters {
154 void Draw(vk::CommandBuffer cmdbuf) const; 111 void Draw(vk::CommandBuffer cmdbuf) const;
155 112
@@ -160,20 +117,8 @@ private:
160 bool is_indexed = 0; 117 bool is_indexed = 0;
161 }; 118 };
162 119
163 using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
164
165 static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
166 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
167
168 void FlushWork(); 120 void FlushWork();
169 121
170 /// @brief Updates the currently bound attachments
171 /// @param is_clear True when the framebuffer is updated as a clear
172 /// @return Bitfield of attachments being used as sampled textures
173 Texceptions UpdateAttachments(bool is_clear);
174
175 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
176
177 /// Setups geometry buffers and state. 122 /// Setups geometry buffers and state.
178 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, 123 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
179 bool is_indexed, bool is_instanced); 124 bool is_indexed, bool is_instanced);
@@ -181,18 +126,12 @@ private:
181 /// Setup descriptors in the graphics pipeline. 126 /// Setup descriptors in the graphics pipeline.
182 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); 127 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
183 128
184 void SetupImageTransitions(Texceptions texceptions,
185 const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
186 const View& zeta_attachment);
187
188 void UpdateDynamicStates(); 129 void UpdateDynamicStates();
189 130
190 void BeginTransformFeedback(); 131 void BeginTransformFeedback();
191 132
192 void EndTransformFeedback(); 133 void EndTransformFeedback();
193 134
194 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
195
196 void SetupVertexArrays(BufferBindings& buffer_bindings); 135 void SetupVertexArrays(BufferBindings& buffer_bindings);
197 136
198 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); 137 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
@@ -238,14 +177,6 @@ private:
238 177
239 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); 178 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
240 179
241 void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
242
243 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
244
245 void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
246
247 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
248
249 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 180 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
250 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 181 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
251 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 182 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -262,18 +193,16 @@ private:
262 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 193 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
263 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 194 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
264 195
265 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; 196 size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
266
267 std::size_t CalculateComputeStreamBufferSize() const;
268 197
269 std::size_t CalculateVertexArraysSize() const; 198 size_t CalculateComputeStreamBufferSize() const;
270 199
271 std::size_t CalculateIndexBufferSize() const; 200 size_t CalculateVertexArraysSize() const;
272 201
273 std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, 202 size_t CalculateIndexBufferSize() const;
274 const Tegra::Engines::ConstBufferInfo& buffer) const;
275 203
276 RenderPassParams GetRenderPassParams(Texceptions texceptions) const; 204 size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
205 const Tegra::Engines::ConstBufferInfo& buffer) const;
277 206
278 VkBuffer DefaultBuffer(); 207 VkBuffer DefaultBuffer();
279 208
@@ -288,18 +217,19 @@ private:
288 StateTracker& state_tracker; 217 StateTracker& state_tracker;
289 VKScheduler& scheduler; 218 VKScheduler& scheduler;
290 219
220 VKStreamBuffer stream_buffer;
291 VKStagingBufferPool staging_pool; 221 VKStagingBufferPool staging_pool;
292 VKDescriptorPool descriptor_pool; 222 VKDescriptorPool descriptor_pool;
293 VKUpdateDescriptorQueue update_descriptor_queue; 223 VKUpdateDescriptorQueue update_descriptor_queue;
294 VKRenderPassCache renderpass_cache; 224 BlitImageHelper blit_image;
295 QuadArrayPass quad_array_pass; 225 QuadArrayPass quad_array_pass;
296 QuadIndexedPass quad_indexed_pass; 226 QuadIndexedPass quad_indexed_pass;
297 Uint8Pass uint8_pass; 227 Uint8Pass uint8_pass;
298 228
299 VKTextureCache texture_cache; 229 TextureCacheRuntime texture_cache_runtime;
230 TextureCache texture_cache;
300 VKPipelineCache pipeline_cache; 231 VKPipelineCache pipeline_cache;
301 VKBufferCache buffer_cache; 232 VKBufferCache buffer_cache;
302 VKSamplerCache sampler_cache;
303 VKQueryCache query_cache; 233 VKQueryCache query_cache;
304 VKFenceManager fence_manager; 234 VKFenceManager fence_manager;
305 235
@@ -308,16 +238,11 @@ private:
308 vk::Event wfi_event; 238 vk::Event wfi_event;
309 VideoCommon::Shader::AsyncShaders async_shaders; 239 VideoCommon::Shader::AsyncShaders async_shaders;
310 240
311 std::array<View, Maxwell::NumRenderTargets> color_attachments; 241 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
312 View zeta_attachment; 242 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
313 243 boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
314 std::vector<ImageView> sampled_views;
315 std::vector<ImageView> image_views;
316 244
317 u32 draw_counter = 0; 245 u32 draw_counter = 0;
318
319 // TODO(Rodrigo): Invalidate on image destruction
320 std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;
321}; 246};
322 247
323} // namespace Vulkan 248} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
deleted file mode 100644
index 80284cf92..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <vector>
8
9#include "common/cityhash.h"
10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15
16namespace Vulkan {
17
18std::size_t RenderPassParams::Hash() const noexcept {
19 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
20 return static_cast<std::size_t>(hash);
21}
22
23bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
24 return std::memcmp(&rhs, this, sizeof *this) == 0;
25}
26
27VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
28
29VKRenderPassCache::~VKRenderPassCache() = default;
30
31VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
32 const auto [pair, is_cache_miss] = cache.try_emplace(params);
33 auto& entry = pair->second;
34 if (is_cache_miss) {
35 entry = CreateRenderPass(params);
36 }
37 return *entry;
38}
39
40vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
41 using namespace VideoCore::Surface;
42 const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
43
44 std::vector<VkAttachmentDescription> descriptors;
45 descriptors.reserve(num_attachments);
46
47 std::vector<VkAttachmentReference> color_references;
48 color_references.reserve(num_attachments);
49
50 for (std::size_t rt = 0; rt < num_attachments; ++rt) {
51 const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
52 const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
53 const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
54 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
55 static_cast<int>(pixel_format));
56
57 // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
58 const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
59 ? VK_IMAGE_LAYOUT_GENERAL
60 : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
61 descriptors.push_back({
62 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
63 .format = format.format,
64 .samples = VK_SAMPLE_COUNT_1_BIT,
65 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
66 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
67 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
68 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
69 .initialLayout = color_layout,
70 .finalLayout = color_layout,
71 });
72
73 color_references.push_back({
74 .attachment = static_cast<u32>(rt),
75 .layout = color_layout,
76 });
77 }
78
79 VkAttachmentReference zeta_attachment_ref;
80 const bool has_zeta = params.zeta_format != 0;
81 if (has_zeta) {
82 const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
83 const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
84 const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
85 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
86 static_cast<int>(pixel_format));
87
88 const VkImageLayout zeta_layout = params.zeta_texception != 0
89 ? VK_IMAGE_LAYOUT_GENERAL
90 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
91 descriptors.push_back({
92 .flags = 0,
93 .format = format.format,
94 .samples = VK_SAMPLE_COUNT_1_BIT,
95 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
96 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
97 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
98 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
99 .initialLayout = zeta_layout,
100 .finalLayout = zeta_layout,
101 });
102
103 zeta_attachment_ref = {
104 .attachment = static_cast<u32>(num_attachments),
105 .layout = zeta_layout,
106 };
107 }
108
109 const VkSubpassDescription subpass_description{
110 .flags = 0,
111 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
112 .inputAttachmentCount = 0,
113 .pInputAttachments = nullptr,
114 .colorAttachmentCount = static_cast<u32>(color_references.size()),
115 .pColorAttachments = color_references.data(),
116 .pResolveAttachments = nullptr,
117 .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
118 .preserveAttachmentCount = 0,
119 .pPreserveAttachments = nullptr,
120 };
121
122 VkAccessFlags access = 0;
123 VkPipelineStageFlags stage = 0;
124 if (!color_references.empty()) {
125 access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
126 stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
127 }
128
129 if (has_zeta) {
130 access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
131 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
132 stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
133 }
134
135 const VkSubpassDependency subpass_dependency{
136 .srcSubpass = VK_SUBPASS_EXTERNAL,
137 .dstSubpass = 0,
138 .srcStageMask = stage,
139 .dstStageMask = stage,
140 .srcAccessMask = 0,
141 .dstAccessMask = access,
142 .dependencyFlags = 0,
143 };
144
145 return device.GetLogical().CreateRenderPass({
146 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
147 .pNext = nullptr,
148 .flags = 0,
149 .attachmentCount = static_cast<u32>(descriptors.size()),
150 .pAttachments = descriptors.data(),
151 .subpassCount = 1,
152 .pSubpasses = &subpass_description,
153 .dependencyCount = 1,
154 .pDependencies = &subpass_dependency,
155 });
156}
157
158} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
deleted file mode 100644
index 8b0fec720..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ /dev/null
@@ -1,70 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <unordered_map>
9
10#include <boost/container/static_vector.hpp>
11#include <boost/functional/hash.hpp>
12
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15#include "video_core/surface.h"
16
17namespace Vulkan {
18
19class VKDevice;
20
21struct RenderPassParams {
22 std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
23 u8 num_color_attachments;
24 u8 texceptions;
25
26 u8 zeta_format;
27 u8 zeta_texception;
28
29 std::size_t Hash() const noexcept;
30
31 bool operator==(const RenderPassParams& rhs) const noexcept;
32
33 bool operator!=(const RenderPassParams& rhs) const noexcept {
34 return !operator==(rhs);
35 }
36};
37static_assert(std::has_unique_object_representations_v<RenderPassParams>);
38static_assert(std::is_trivially_copyable_v<RenderPassParams>);
39static_assert(std::is_trivially_constructible_v<RenderPassParams>);
40
41} // namespace Vulkan
42
43namespace std {
44
45template <>
46struct hash<Vulkan::RenderPassParams> {
47 std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
48 return k.Hash();
49 }
50};
51
52} // namespace std
53
54namespace Vulkan {
55
56class VKRenderPassCache final {
57public:
58 explicit VKRenderPassCache(const VKDevice& device);
59 ~VKRenderPassCache();
60
61 VkRenderPass GetRenderPass(const RenderPassParams& params);
62
63private:
64 vk::RenderPass CreateRenderPass(const RenderPassParams& params) const;
65
66 const VKDevice& device;
67 std::unordered_map<RenderPassParams, vk::RenderPass> cache;
68};
69
70} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
deleted file mode 100644
index b068888f9..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <unordered_map>
6
7#include "video_core/renderer_vulkan/maxwell_to_vk.h"
8#include "video_core/renderer_vulkan/vk_sampler_cache.h"
9#include "video_core/renderer_vulkan/wrapper.h"
10#include "video_core/textures/texture.h"
11
12using Tegra::Texture::TextureMipmapFilter;
13
14namespace Vulkan {
15
16namespace {
17
18VkBorderColor ConvertBorderColor(std::array<float, 4> color) {
19 // TODO(Rodrigo): Manage integer border colors
20 if (color == std::array<float, 4>{0, 0, 0, 0}) {
21 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
22 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
23 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
24 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
25 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
26 }
27 if (color[0] + color[1] + color[2] > 1.35f) {
28 // If color elements are brighter than roughly 0.5 average, use white border
29 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
30 } else if (color[3] > 0.5f) {
31 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
32 } else {
33 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
34 }
35}
36
37} // Anonymous namespace
38
39VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
40
41VKSamplerCache::~VKSamplerCache() = default;
42
43vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
44 const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
45 const std::array color = tsc.GetBorderColor();
46
47 VkSamplerCustomBorderColorCreateInfoEXT border{
48 .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
49 .pNext = nullptr,
50 .customBorderColor = {},
51 .format = VK_FORMAT_UNDEFINED,
52 };
53 std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
54
55 return device.GetLogical().CreateSampler({
56 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
57 .pNext = arbitrary_borders ? &border : nullptr,
58 .flags = 0,
59 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
60 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
61 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
62 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
63 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
64 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
65 .mipLodBias = tsc.GetLodBias(),
66 .anisotropyEnable =
67 static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
68 .maxAnisotropy = tsc.GetMaxAnisotropy(),
69 .compareEnable = tsc.depth_compare_enabled,
70 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
71 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
72 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
73 .borderColor =
74 arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
75 .unnormalizedCoordinates = VK_FALSE,
76 });
77}
78
79VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
80 return *sampler;
81}
82
83} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
deleted file mode 100644
index a33d1c0ee..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/renderer_vulkan/wrapper.h"
8#include "video_core/sampler_cache.h"
9#include "video_core/textures/texture.h"
10
11namespace Vulkan {
12
13class VKDevice;
14
15class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> {
16public:
17 explicit VKSamplerCache(const VKDevice& device);
18 ~VKSamplerCache();
19
20protected:
21 vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
22
23 VkSampler ToSamplerType(const vk::Sampler& sampler) const override;
24
25private:
26 const VKDevice& device;
27};
28
29} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 1a483dc71..c104c6fe3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -16,6 +16,7 @@
16#include "video_core/renderer_vulkan/vk_query_cache.h" 16#include "video_core/renderer_vulkan/vk_query_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_state_tracker.h" 18#include "video_core/renderer_vulkan/vk_state_tracker.h"
19#include "video_core/renderer_vulkan/vk_texture_cache.h"
19#include "video_core/renderer_vulkan/wrapper.h" 20#include "video_core/renderer_vulkan/wrapper.h"
20 21
21namespace Vulkan { 22namespace Vulkan {
@@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() {
96 AcquireNewChunk(); 97 AcquireNewChunk();
97} 98}
98 99
99void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, 100void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
100 VkExtent2D render_area) { 101 const VkRenderPass renderpass = framebuffer->RenderPass();
101 if (renderpass == state.renderpass && framebuffer == state.framebuffer && 102 const VkFramebuffer framebuffer_handle = framebuffer->Handle();
103 const VkExtent2D render_area = framebuffer->RenderArea();
104 if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
102 render_area.width == state.render_area.width && 105 render_area.width == state.render_area.width &&
103 render_area.height == state.render_area.height) { 106 render_area.height == state.render_area.height) {
104 return; 107 return;
105 } 108 }
106 const bool end_renderpass = state.renderpass != nullptr; 109 EndRenderPass();
107 state.renderpass = renderpass; 110 state.renderpass = renderpass;
108 state.framebuffer = framebuffer; 111 state.framebuffer = framebuffer_handle;
109 state.render_area = render_area; 112 state.render_area = render_area;
110 113
111 const VkRenderPassBeginInfo renderpass_bi{ 114 Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
112 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 115 const VkRenderPassBeginInfo renderpass_bi{
113 .pNext = nullptr, 116 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
114 .renderPass = renderpass, 117 .pNext = nullptr,
115 .framebuffer = framebuffer, 118 .renderPass = renderpass,
116 .renderArea = 119 .framebuffer = framebuffer_handle,
117 { 120 .renderArea =
118 .offset = {.x = 0, .y = 0}, 121 {
119 .extent = render_area, 122 .offset = {.x = 0, .y = 0},
120 }, 123 .extent = render_area,
121 .clearValueCount = 0, 124 },
122 .pClearValues = nullptr, 125 .clearValueCount = 0,
123 }; 126 .pClearValues = nullptr,
124 127 };
125 Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
126 if (end_renderpass) {
127 cmdbuf.EndRenderPass();
128 }
129 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 128 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
130 }); 129 });
130 num_renderpass_images = framebuffer->NumImages();
131 renderpass_images = framebuffer->Images();
132 renderpass_image_ranges = framebuffer->ImageRanges();
131} 133}
132 134
133void VKScheduler::RequestOutsideRenderPassOperationContext() { 135void VKScheduler::RequestOutsideRenderPassOperationContext() {
@@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() {
241 if (!state.renderpass) { 243 if (!state.renderpass) {
242 return; 244 return;
243 } 245 }
246 Record([num_images = num_renderpass_images, images = renderpass_images,
247 ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
248 std::array<VkImageMemoryBarrier, 9> barriers;
249 for (size_t i = 0; i < num_images; ++i) {
250 barriers[i] = VkImageMemoryBarrier{
251 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
252 .pNext = nullptr,
253 .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
254 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
255 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
256 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
257 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
258 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
259 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
260 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
261 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
262 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
263 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
264 .image = images[i],
265 .subresourceRange = ranges[i],
266 };
267 }
268 cmdbuf.EndRenderPass();
269 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
270 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
271 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
272 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
273 vk::Span(barriers.data(), num_images));
274 });
244 state.renderpass = nullptr; 275 state.renderpass = nullptr;
245 Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); 276 num_renderpass_images = 0;
246} 277}
247 278
248void VKScheduler::AcquireNewChunk() { 279void VKScheduler::AcquireNewChunk() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 7be8a19f0..0a36c8fad 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -17,6 +17,7 @@
17namespace Vulkan { 17namespace Vulkan {
18 18
19class CommandPool; 19class CommandPool;
20class Framebuffer;
20class MasterSemaphore; 21class MasterSemaphore;
21class StateTracker; 22class StateTracker;
22class VKDevice; 23class VKDevice;
@@ -52,8 +53,7 @@ public:
52 void DispatchWork(); 53 void DispatchWork();
53 54
54 /// Requests to begin a renderpass. 55 /// Requests to begin a renderpass.
55 void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, 56 void RequestRenderpass(const Framebuffer* framebuffer);
56 VkExtent2D render_area);
57 57
58 /// Requests the current executino context to be able to execute operations only allowed outside 58 /// Requests the current executino context to be able to execute operations only allowed outside
59 /// of a renderpass. 59 /// of a renderpass.
@@ -62,6 +62,9 @@ public:
62 /// Binds a pipeline to the current execution context. 62 /// Binds a pipeline to the current execution context.
63 void BindGraphicsPipeline(VkPipeline pipeline); 63 void BindGraphicsPipeline(VkPipeline pipeline);
64 64
65 /// Invalidates current command buffer state except for render passes
66 void InvalidateState();
67
65 /// Assigns the query cache. 68 /// Assigns the query cache.
66 void SetQueryCache(VKQueryCache& query_cache_) { 69 void SetQueryCache(VKQueryCache& query_cache_) {
67 query_cache = &query_cache_; 70 query_cache = &query_cache_;
@@ -104,7 +107,7 @@ private:
104 template <typename T> 107 template <typename T>
105 class TypedCommand final : public Command { 108 class TypedCommand final : public Command {
106 public: 109 public:
107 explicit TypedCommand(T&& command) : command{std::move(command)} {} 110 explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
108 ~TypedCommand() override = default; 111 ~TypedCommand() override = default;
109 112
110 TypedCommand(TypedCommand&&) = delete; 113 TypedCommand(TypedCommand&&) = delete;
@@ -170,8 +173,6 @@ private:
170 173
171 void AllocateNewContext(); 174 void AllocateNewContext();
172 175
173 void InvalidateState();
174
175 void EndPendingOperations(); 176 void EndPendingOperations();
176 177
177 void EndRenderPass(); 178 void EndRenderPass();
@@ -192,6 +193,11 @@ private:
192 std::thread worker_thread; 193 std::thread worker_thread;
193 194
194 State state; 195 State state;
196
197 u32 num_renderpass_images = 0;
198 std::array<VkImage, 9> renderpass_images{};
199 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
200
195 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; 201 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
196 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 202 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
197 std::mutex mutex; 203 std::mutex mutex;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index fed9ebecd..09d6f9f35 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -55,8 +55,8 @@ enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
55 55
56class Expression final { 56class Expression final {
57public: 57public:
58 Expression(Id id, Type type) : id{id}, type{type} { 58 Expression(Id id_, Type type_) : id{id_}, type{type_} {
59 ASSERT(type != Type::Void); 59 ASSERT(type_ != Type::Void);
60 } 60 }
61 Expression() : type{Type::Void} {} 61 Expression() : type{Type::Void} {}
62 62
@@ -102,7 +102,7 @@ struct GenericVaryingDescription {
102 bool is_scalar = false; 102 bool is_scalar = false;
103}; 103};
104 104
105spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
106 ASSERT(!sampler.is_buffer); 106 ASSERT(!sampler.is_buffer);
107 switch (sampler.type) { 107 switch (sampler.type) {
108 case Tegra::Shader::TextureType::Texture1D: 108 case Tegra::Shader::TextureType::Texture1D:
@@ -114,12 +114,12 @@ spv::Dim GetSamplerDim(const Sampler& sampler) {
114 case Tegra::Shader::TextureType::TextureCube: 114 case Tegra::Shader::TextureType::TextureCube:
115 return spv::Dim::Cube; 115 return spv::Dim::Cube;
116 default: 116 default:
117 UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<int>(sampler.type)); 117 UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
118 return spv::Dim::Dim2D; 118 return spv::Dim::Dim2D;
119 } 119 }
120} 120}
121 121
122std::pair<spv::Dim, bool> GetImageDim(const Image& image) { 122std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
123 switch (image.type) { 123 switch (image.type) {
124 case Tegra::Shader::ImageType::Texture1D: 124 case Tegra::Shader::ImageType::Texture1D:
125 return {spv::Dim::Dim1D, false}; 125 return {spv::Dim::Dim1D, false};
@@ -134,7 +134,7 @@ std::pair<spv::Dim, bool> GetImageDim(const Image& image) {
134 case Tegra::Shader::ImageType::Texture3D: 134 case Tegra::Shader::ImageType::Texture3D:
135 return {spv::Dim::Dim3D, false}; 135 return {spv::Dim::Dim3D, false};
136 default: 136 default:
137 UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(image.type)); 137 UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
138 return {spv::Dim::Dim2D, false}; 138 return {spv::Dim::Dim2D, false};
139 } 139 }
140} 140}
@@ -281,12 +281,12 @@ u32 ShaderVersion(const VKDevice& device) {
281 281
282class SPIRVDecompiler final : public Sirit::Module { 282class SPIRVDecompiler final : public Sirit::Module {
283public: 283public:
284 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, 284 explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_,
285 const Registry& registry, const Specialization& specialization) 285 const Registry& registry_, const Specialization& specialization_)
286 : Module(ShaderVersion(device)), device{device}, ir{ir}, stage{stage}, 286 : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_},
287 header{ir.GetHeader()}, registry{registry}, specialization{specialization} { 287 header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} {
288 if (stage != ShaderType::Compute) { 288 if (stage_ != ShaderType::Compute) {
289 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); 289 transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
290 } 290 }
291 291
292 AddCapability(spv::Capability::Shader); 292 AddCapability(spv::Capability::Shader);
@@ -330,7 +330,7 @@ public:
330 if (device.IsFloat16Supported()) { 330 if (device.IsFloat16Supported()) {
331 AddCapability(spv::Capability::Float16); 331 AddCapability(spv::Capability::Float16);
332 } 332 }
333 t_scalar_half = Name(TypeFloat(device.IsFloat16Supported() ? 16 : 32), "scalar_half"); 333 t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
334 t_half = Name(TypeVector(t_scalar_half, 2), "half"); 334 t_half = Name(TypeVector(t_scalar_half, 2), "half");
335 335
336 const Id main = Decompile(); 336 const Id main = Decompile();
@@ -980,7 +980,7 @@ private:
980 return binding; 980 return binding;
981 } 981 }
982 982
983 void DeclareImage(const Image& image, u32& binding) { 983 void DeclareImage(const ImageEntry& image, u32& binding) {
984 const auto [dim, arrayed] = GetImageDim(image); 984 const auto [dim, arrayed] = GetImageDim(image);
985 constexpr int depth = 0; 985 constexpr int depth = 0;
986 constexpr bool ms = false; 986 constexpr bool ms = false;
@@ -1088,9 +1088,9 @@ private:
1088 indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); 1088 indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
1089 } 1089 }
1090 1090
1091 const auto& output_attributes = ir.GetOutputAttributes(); 1091 const auto& ir_output_attributes = ir.GetOutputAttributes();
1092 const bool declare_clip_distances = 1092 const bool declare_clip_distances = std::any_of(
1093 std::any_of(output_attributes.begin(), output_attributes.end(), [](const auto& index) { 1093 ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
1094 return index == Attribute::Index::ClipDistances0123 || 1094 return index == Attribute::Index::ClipDistances0123 ||
1095 index == Attribute::Index::ClipDistances4567; 1095 index == Attribute::Index::ClipDistances4567;
1096 }); 1096 });
@@ -1254,7 +1254,7 @@ private:
1254 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); 1254 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1255 return {OpLoad(GetTypeDefinition(type), pointer), type}; 1255 return {OpLoad(GetTypeDefinition(type), pointer), type};
1256 } 1256 }
1257 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 1257 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1258 return {v_float_zero, Type::Float}; 1258 return {v_float_zero, Type::Float};
1259 } 1259 }
1260 1260
@@ -1890,7 +1890,7 @@ private:
1890 case Tegra::Shader::TextureType::Texture3D: 1890 case Tegra::Shader::TextureType::Texture3D:
1891 return 3; 1891 return 3;
1892 default: 1892 default:
1893 UNREACHABLE_MSG("Invalid texture type={}", static_cast<int>(type)); 1893 UNREACHABLE_MSG("Invalid texture type={}", type);
1894 return 2; 1894 return 2;
1895 } 1895 }
1896 }(); 1896 }();
@@ -2094,6 +2094,7 @@ private:
2094 return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); 2094 return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
2095 default: 2095 default:
2096 UNREACHABLE(); 2096 UNREACHABLE();
2097 return v_true;
2097 } 2098 }
2098 } 2099 }
2099 2100
@@ -2125,8 +2126,7 @@ private:
2125 OpStore(z_pointer, depth); 2126 OpStore(z_pointer, depth);
2126 } 2127 }
2127 if (stage == ShaderType::Fragment) { 2128 if (stage == ShaderType::Fragment) {
2128 const auto SafeGetRegister = [&](u32 reg) { 2129 const auto SafeGetRegister = [this](u32 reg) {
2129 // TODO(Rodrigo): Replace with contains once C++20 releases
2130 if (const auto it = registers.find(reg); it != registers.end()) { 2130 if (const auto it = registers.find(reg); it != registers.end()) {
2131 return OpLoad(t_float, it->second); 2131 return OpLoad(t_float, it->second);
2132 } 2132 }
@@ -2891,7 +2891,7 @@ private:
2891 2891
2892class ExprDecompiler { 2892class ExprDecompiler {
2893public: 2893public:
2894 explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} 2894 explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2895 2895
2896 Id operator()(const ExprAnd& expr) { 2896 Id operator()(const ExprAnd& expr) {
2897 const Id type_def = decomp.GetTypeDefinition(Type::Bool); 2897 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
@@ -2947,7 +2947,7 @@ private:
2947 2947
2948class ASTDecompiler { 2948class ASTDecompiler {
2949public: 2949public:
2950 explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} 2950 explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2951 2951
2952 void operator()(const ASTProgram& ast) { 2952 void operator()(const ASTProgram& ast) {
2953 ASTNode current = ast.nodes.GetFirst(); 2953 ASTNode current = ast.nodes.GetFirst();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 110848922..ad91ad5de 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -21,17 +21,17 @@ class VKDevice;
21namespace Vulkan { 21namespace Vulkan {
22 22
23using Maxwell = Tegra::Engines::Maxwell3D::Regs; 23using Maxwell = Tegra::Engines::Maxwell3D::Regs;
24using UniformTexelEntry = VideoCommon::Shader::Sampler; 24using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
25using SamplerEntry = VideoCommon::Shader::Sampler; 25using SamplerEntry = VideoCommon::Shader::SamplerEntry;
26using StorageTexelEntry = VideoCommon::Shader::Image; 26using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
27using ImageEntry = VideoCommon::Shader::Image; 27using ImageEntry = VideoCommon::Shader::ImageEntry;
28 28
29constexpr u32 DESCRIPTOR_SET = 0; 29constexpr u32 DESCRIPTOR_SET = 0;
30 30
31class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { 31class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
32public: 32public:
33 explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index) 33 explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
34 : VideoCommon::Shader::ConstBuffer{entry}, index{index} {} 34 : ConstBuffer{entry_}, index{index_} {}
35 35
36 constexpr u32 GetIndex() const { 36 constexpr u32 GetIndex() const {
37 return index; 37 return index;
@@ -43,8 +43,8 @@ private:
43 43
44class GlobalBufferEntry { 44class GlobalBufferEntry {
45public: 45public:
46 constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written) 46 constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
47 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {} 47 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
48 48
49 constexpr u32 GetCbufIndex() const { 49 constexpr u32 GetCbufIndex() const {
50 return cbuf_index; 50 return cbuf_index;
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index c1a218d76..38a0be7f2 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -13,18 +13,13 @@
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
16vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { 16vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) {
17 // Avoid undefined behavior by copying to a staging allocation
18 ASSERT(code_size % sizeof(u32) == 0);
19 const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
20 std::memcpy(data.get(), code_data, code_size);
21
22 return device.GetLogical().CreateShaderModule({ 17 return device.GetLogical().CreateShaderModule({
23 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 18 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
24 .pNext = nullptr, 19 .pNext = nullptr,
25 .flags = 0, 20 .flags = 0,
26 .codeSize = code_size, 21 .codeSize = static_cast<u32>(code.size_bytes()),
27 .pCode = data.get(), 22 .pCode = code.data(),
28 }); 23 });
29} 24}
30 25
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
index d1d3f3cae..dce34a140 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -4,6 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <span>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/renderer_vulkan/wrapper.h"
9 11
@@ -11,6 +13,6 @@ namespace Vulkan {
11 13
12class VKDevice; 14class VKDevice;
13 15
14vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); 16vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code);
15 17
16} // namespace Vulkan 18} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 5d2c4a796..1779a2e30 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cstddef> 7#include <cstddef>
7#include <iterator> 8#include <iterator>
8 9
@@ -14,7 +15,7 @@
14#include "video_core/renderer_vulkan/vk_state_tracker.h" 15#include "video_core/renderer_vulkan/vk_state_tracker.h"
15 16
16#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) 17#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
17#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) 18#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
18 19
19namespace Vulkan { 20namespace Vulkan {
20 21
@@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table;
29using Flags = Maxwell3D::DirtyState::Flags; 30using Flags = Maxwell3D::DirtyState::Flags;
30 31
31Flags MakeInvalidationFlags() { 32Flags MakeInvalidationFlags() {
33 static constexpr std::array INVALIDATION_FLAGS{
34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
37 };
32 Flags flags{}; 38 Flags flags{};
33 flags[Viewports] = true; 39 for (const int flag : INVALIDATION_FLAGS) {
34 flags[Scissors] = true; 40 flags[flag] = true;
35 flags[DepthBias] = true; 41 }
36 flags[BlendConstants] = true;
37 flags[DepthBounds] = true;
38 flags[StencilProperties] = true;
39 flags[CullMode] = true;
40 flags[DepthBoundsEnable] = true;
41 flags[DepthTestEnable] = true;
42 flags[DepthWriteEnable] = true;
43 flags[DepthCompareOp] = true;
44 flags[FrontFace] = true;
45 flags[StencilOp] = true;
46 flags[StencilTestEnable] = true;
47 return flags; 42 return flags;
48} 43}
49 44
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 1de789e57..c335d2bdf 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -52,6 +52,14 @@ public:
52 current_topology = INVALID_TOPOLOGY; 52 current_topology = INVALID_TOPOLOGY;
53 } 53 }
54 54
55 void InvalidateViewports() {
56 flags[Dirty::Viewports] = true;
57 }
58
59 void InvalidateScissors() {
60 flags[Dirty::Scissors] = true;
61 }
62
55 bool TouchViewports() { 63 bool TouchViewports() {
56 return Exchange(Dirty::Viewports, false); 64 return Exchange(Dirty::Viewports, false);
57 } 65 }
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 1b59612b9..419cb154d 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -19,6 +19,10 @@ namespace Vulkan {
19 19
20namespace { 20namespace {
21 21
22constexpr VkBufferUsageFlags BUFFER_USAGE =
23 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
24 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
25
22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 26constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 27constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
24 28
@@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
56 60
57} // Anonymous namespace 61} // Anonymous namespace
58 62
59VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, 63VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_)
60 VkBufferUsageFlags usage)
61 : device{device_}, scheduler{scheduler_} { 64 : device{device_}, scheduler{scheduler_} {
62 CreateBuffers(usage); 65 CreateBuffers();
63 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); 66 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
64 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); 67 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
65} 68}
66 69
67VKStreamBuffer::~VKStreamBuffer() = default; 70VKStreamBuffer::~VKStreamBuffer() = default;
68 71
69std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { 72std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
70 ASSERT(size <= stream_buffer_size); 73 ASSERT(size <= stream_buffer_size);
71 mapped_size = size; 74 mapped_size = size;
72 75
@@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
76 79
77 WaitPendingOperations(offset); 80 WaitPendingOperations(offset);
78 81
79 bool invalidated = false;
80 if (offset + size > stream_buffer_size) { 82 if (offset + size > stream_buffer_size) {
81 // The buffer would overflow, save the amount of used watches and reset the state. 83 // The buffer would overflow, save the amount of used watches and reset the state.
82 invalidation_mark = current_watch_cursor; 84 invalidation_mark = current_watch_cursor;
@@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
90 92
91 // Ensure that we don't wait for uncommitted fences. 93 // Ensure that we don't wait for uncommitted fences.
92 scheduler.Flush(); 94 scheduler.Flush();
93
94 invalidated = true;
95 } 95 }
96 96
97 return {memory.Map(offset, size), offset, invalidated}; 97 return std::make_pair(memory.Map(offset, size), offset);
98} 98}
99 99
100void VKStreamBuffer::Unmap(u64 size) { 100void VKStreamBuffer::Unmap(u64 size) {
@@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) {
113 watch.tick = scheduler.CurrentTick(); 113 watch.tick = scheduler.CurrentTick();
114} 114}
115 115
116void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { 116void VKStreamBuffer::CreateBuffers() {
117 const auto memory_properties = device.GetPhysical().GetMemoryProperties(); 117 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
118 const u32 preferred_type = GetMemoryType(memory_properties); 118 const u32 preferred_type = GetMemoryType(memory_properties);
119 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; 119 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
@@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
127 .pNext = nullptr, 127 .pNext = nullptr,
128 .flags = 0, 128 .flags = 0,
129 .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), 129 .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
130 .usage = usage, 130 .usage = BUFFER_USAGE,
131 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 131 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
132 .queueFamilyIndexCount = 0, 132 .queueFamilyIndexCount = 0,
133 .pQueueFamilyIndices = nullptr, 133 .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 5e15ad78f..1428f77bf 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <optional> 7#include <optional>
8#include <tuple> 8#include <utility>
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
@@ -19,17 +19,15 @@ class VKScheduler;
19 19
20class VKStreamBuffer final { 20class VKStreamBuffer final {
21public: 21public:
22 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 22 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler);
23 VkBufferUsageFlags usage);
24 ~VKStreamBuffer(); 23 ~VKStreamBuffer();
25 24
26 /** 25 /**
27 * Reserves a region of memory from the stream buffer. 26 * Reserves a region of memory from the stream buffer.
28 * @param size Size to reserve. 27 * @param size Size to reserve.
29 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer 28 * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
30 * offset and a boolean that's true when buffer has been invalidated.
31 */ 29 */
32 std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); 30 std::pair<u8*, u64> Map(u64 size, u64 alignment);
33 31
34 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 32 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
35 void Unmap(u64 size); 33 void Unmap(u64 size);
@@ -49,7 +47,7 @@ private:
49 }; 47 };
50 48
51 /// Creates Vulkan buffer handles committing the required the required memory. 49 /// Creates Vulkan buffer handles committing the required the required memory.
52 void CreateBuffers(VkBufferUsageFlags usage); 50 void CreateBuffers();
53 51
54 /// Increases the amount of watches available. 52 /// Increases the amount of watches available.
55 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); 53 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index f2c8f2ae1..261808391 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -4,613 +4,1103 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <span>
8#include <cstring>
9#include <memory>
10#include <variant>
11#include <vector> 8#include <vector>
12 9
13#include "common/assert.h" 10#include "video_core/engines/fermi_2d.h"
14#include "common/common_types.h" 11#include "video_core/renderer_vulkan/blit_image.h"
15#include "core/core.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/morton.h"
18#include "video_core/renderer_vulkan/maxwell_to_vk.h" 12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
19#include "video_core/renderer_vulkan/vk_device.h" 13#include "video_core/renderer_vulkan/vk_device.h"
20#include "video_core/renderer_vulkan/vk_memory_manager.h"
21#include "video_core/renderer_vulkan/vk_rasterizer.h"
22#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
23#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
24#include "video_core/renderer_vulkan/vk_texture_cache.h" 16#include "video_core/renderer_vulkan/vk_texture_cache.h"
25#include "video_core/renderer_vulkan/wrapper.h" 17#include "video_core/renderer_vulkan/wrapper.h"
26#include "video_core/surface.h"
27 18
28namespace Vulkan { 19namespace Vulkan {
29 20
30using VideoCore::MortonSwizzle; 21using Tegra::Engines::Fermi2D;
31using VideoCore::MortonSwizzleMode;
32
33using Tegra::Texture::SwizzleSource; 22using Tegra::Texture::SwizzleSource;
34using VideoCore::Surface::PixelFormat; 23using Tegra::Texture::TextureMipmapFilter;
35using VideoCore::Surface::SurfaceTarget; 24using VideoCommon::BufferImageCopy;
25using VideoCommon::ImageInfo;
26using VideoCommon::ImageType;
27using VideoCommon::SubresourceRange;
28using VideoCore::Surface::IsPixelFormatASTC;
36 29
37namespace { 30namespace {
38 31
39VkImageType SurfaceTargetToImage(SurfaceTarget target) { 32constexpr std::array ATTACHMENT_REFERENCES{
40 switch (target) { 33 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
41 case SurfaceTarget::Texture1D: 34 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
42 case SurfaceTarget::Texture1DArray: 35 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
36 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
37 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
38 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
39 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
40 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
41 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
42};
43
44constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
45 if (color == std::array<float, 4>{0, 0, 0, 0}) {
46 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
47 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
48 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
49 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
50 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
51 }
52 if (color[0] + color[1] + color[2] > 1.35f) {
53 // If color elements are brighter than roughly 0.5 average, use white border
54 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
55 } else if (color[3] > 0.5f) {
56 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
57 } else {
58 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
59 }
60}
61
62[[nodiscard]] VkImageType ConvertImageType(const ImageType type) {
63 switch (type) {
64 case ImageType::e1D:
43 return VK_IMAGE_TYPE_1D; 65 return VK_IMAGE_TYPE_1D;
44 case SurfaceTarget::Texture2D: 66 case ImageType::e2D:
45 case SurfaceTarget::Texture2DArray: 67 case ImageType::Linear:
46 case SurfaceTarget::TextureCubemap:
47 case SurfaceTarget::TextureCubeArray:
48 return VK_IMAGE_TYPE_2D; 68 return VK_IMAGE_TYPE_2D;
49 case SurfaceTarget::Texture3D: 69 case ImageType::e3D:
50 return VK_IMAGE_TYPE_3D; 70 return VK_IMAGE_TYPE_3D;
51 case SurfaceTarget::TextureBuffer: 71 case ImageType::Buffer:
52 UNREACHABLE(); 72 break;
53 return {};
54 } 73 }
55 UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); 74 UNREACHABLE_MSG("Invalid image type={}", type);
56 return {}; 75 return {};
57} 76}
58 77
59VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { 78[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) {
60 if (pixel_format < PixelFormat::MaxColorFormat) { 79 switch (num_samples) {
61 return VK_IMAGE_ASPECT_COLOR_BIT; 80 case 1:
62 } else if (pixel_format < PixelFormat::MaxDepthFormat) { 81 return VK_SAMPLE_COUNT_1_BIT;
63 return VK_IMAGE_ASPECT_DEPTH_BIT; 82 case 2:
64 } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { 83 return VK_SAMPLE_COUNT_2_BIT;
65 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; 84 case 4:
66 } else { 85 return VK_SAMPLE_COUNT_4_BIT;
67 UNREACHABLE_MSG("Invalid pixel format={}", static_cast<int>(pixel_format)); 86 case 8:
68 return VK_IMAGE_ASPECT_COLOR_BIT; 87 return VK_SAMPLE_COUNT_8_BIT;
88 case 16:
89 return VK_SAMPLE_COUNT_16_BIT;
90 default:
91 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
92 return VK_SAMPLE_COUNT_1_BIT;
69 } 93 }
70} 94}
71 95
72VkImageViewType GetImageViewType(SurfaceTarget target) { 96[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) {
73 switch (target) { 97 const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format);
74 case SurfaceTarget::Texture1D: 98 VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
75 return VK_IMAGE_VIEW_TYPE_1D; 99 if (info.type == ImageType::e2D && info.resources.layers >= 6 &&
76 case SurfaceTarget::Texture2D: 100 info.size.width == info.size.height) {
77 return VK_IMAGE_VIEW_TYPE_2D; 101 flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
78 case SurfaceTarget::Texture3D:
79 return VK_IMAGE_VIEW_TYPE_3D;
80 case SurfaceTarget::Texture1DArray:
81 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
82 case SurfaceTarget::Texture2DArray:
83 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
84 case SurfaceTarget::TextureCubemap:
85 return VK_IMAGE_VIEW_TYPE_CUBE;
86 case SurfaceTarget::TextureCubeArray:
87 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
88 case SurfaceTarget::TextureBuffer:
89 break;
90 } 102 }
91 UNREACHABLE(); 103 if (info.type == ImageType::e3D) {
92 return {}; 104 flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
93} 105 }
94 106 VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
95vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, 107 VK_IMAGE_USAGE_SAMPLED_BIT;
96 std::size_t host_memory_size) { 108 if (format_info.attachable) {
97 // TODO(Rodrigo): Move texture buffer creation to the buffer cache 109 switch (VideoCore::Surface::GetFormatType(info.format)) {
98 return device.GetLogical().CreateBuffer({ 110 case VideoCore::Surface::SurfaceType::ColorTexture:
99 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 111 usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
112 break;
113 case VideoCore::Surface::SurfaceType::Depth:
114 case VideoCore::Surface::SurfaceType::DepthStencil:
115 usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
116 break;
117 default:
118 UNREACHABLE_MSG("Invalid surface type");
119 }
120 }
121 if (format_info.storage) {
122 usage |= VK_IMAGE_USAGE_STORAGE_BIT;
123 }
124 const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples);
125 return VkImageCreateInfo{
126 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
100 .pNext = nullptr, 127 .pNext = nullptr,
101 .flags = 0, 128 .flags = flags,
102 .size = static_cast<VkDeviceSize>(host_memory_size), 129 .imageType = ConvertImageType(info.type),
103 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | 130 .format = format_info.format,
104 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | 131 .extent =
105 VK_BUFFER_USAGE_TRANSFER_DST_BIT, 132 {
133 .width = info.size.width >> samples_x,
134 .height = info.size.height >> samples_y,
135 .depth = info.size.depth,
136 },
137 .mipLevels = static_cast<u32>(info.resources.levels),
138 .arrayLayers = static_cast<u32>(info.resources.layers),
139 .samples = ConvertSampleCount(info.num_samples),
140 .tiling = VK_IMAGE_TILING_OPTIMAL,
141 .usage = usage,
106 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 142 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
107 .queueFamilyIndexCount = 0, 143 .queueFamilyIndexCount = 0,
108 .pQueueFamilyIndices = nullptr, 144 .pQueueFamilyIndices = nullptr,
109 }); 145 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
110}
111
112VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
113 const SurfaceParams& params, VkBuffer buffer,
114 std::size_t host_memory_size) {
115 ASSERT(params.IsBuffer());
116
117 return {
118 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
119 .pNext = nullptr,
120 .flags = 0,
121 .buffer = buffer,
122 .format =
123 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format,
124 .offset = 0,
125 .range = static_cast<VkDeviceSize>(host_memory_size),
126 }; 146 };
127} 147}
128 148
129VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { 149[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) {
130 ASSERT(!params.IsBuffer()); 150 if (info.type == ImageType::Buffer) {
131 151 return vk::Image{};
132 const auto [format, attachable, storage] = 152 }
133 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); 153 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
154}
134 155
135 VkImageCreateInfo ci{ 156[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) {
136 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 157 if (info.type != ImageType::Buffer) {
158 return vk::Buffer{};
159 }
160 const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
161 return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
162 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
137 .pNext = nullptr, 163 .pNext = nullptr,
138 .flags = 0, 164 .flags = 0,
139 .imageType = SurfaceTargetToImage(params.target), 165 .size = info.size.width * bytes_per_block,
140 .format = format, 166 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
141 .extent = {}, 167 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
142 .mipLevels = params.num_levels, 168 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
143 .arrayLayers = static_cast<u32>(params.GetNumLayers()),
144 .samples = VK_SAMPLE_COUNT_1_BIT,
145 .tiling = VK_IMAGE_TILING_OPTIMAL,
146 .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
147 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
148 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 169 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
149 .queueFamilyIndexCount = 0, 170 .queueFamilyIndexCount = 0,
150 .pQueueFamilyIndices = nullptr, 171 .pQueueFamilyIndices = nullptr,
151 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 172 });
152 };
153 if (attachable) {
154 ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
155 : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
156 }
157 if (storage) {
158 ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
159 }
160
161 switch (params.target) {
162 case SurfaceTarget::TextureCubemap:
163 case SurfaceTarget::TextureCubeArray:
164 ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
165 [[fallthrough]];
166 case SurfaceTarget::Texture1D:
167 case SurfaceTarget::Texture1DArray:
168 case SurfaceTarget::Texture2D:
169 case SurfaceTarget::Texture2DArray:
170 ci.extent = {params.width, params.height, 1};
171 break;
172 case SurfaceTarget::Texture3D:
173 ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
174 ci.extent = {params.width, params.height, params.depth};
175 break;
176 case SurfaceTarget::TextureBuffer:
177 UNREACHABLE();
178 }
179
180 return ci;
181} 173}
182 174
183u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, 175[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
184 Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) { 176 switch (VideoCore::Surface::GetFormatType(format)) {
185 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | 177 case VideoCore::Surface::SurfaceType::ColorTexture:
186 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); 178 return VK_IMAGE_ASPECT_COLOR_BIT;
179 case VideoCore::Surface::SurfaceType::Depth:
180 return VK_IMAGE_ASPECT_DEPTH_BIT;
181 case VideoCore::Surface::SurfaceType::DepthStencil:
182 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
183 default:
184 UNREACHABLE_MSG("Invalid surface type");
185 return VkImageAspectFlags{};
186 }
187} 187}
188 188
189} // Anonymous namespace 189[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) {
190 190 if (info.IsRenderTarget()) {
191CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, 191 return ImageAspectMask(info.format);
192 VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
193 GPUVAddr gpu_addr, const SurfaceParams& params)
194 : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device},
195 memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
196 if (params.IsBuffer()) {
197 buffer = CreateBuffer(device, params, host_memory_size);
198 commit = memory_manager.Commit(buffer, false);
199
200 const auto buffer_view_ci =
201 GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
202 format = buffer_view_ci.format;
203
204 buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci);
205 } else {
206 const auto image_ci = GenerateImageCreateInfo(device, params);
207 format = image_ci.format;
208
209 image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
210 commit = memory_manager.Commit(image->GetHandle(), false);
211 } 192 }
212 193 const bool is_first = info.Swizzle()[0] == SwizzleSource::R;
213 // TODO(Rodrigo): Move this to a virtual function. 194 switch (info.format) {
214 u32 num_layers = 1; 195 case PixelFormat::D24_UNORM_S8_UINT:
215 if (params.is_layered || params.target == SurfaceTarget::Texture3D) { 196 case PixelFormat::D32_FLOAT_S8_UINT:
216 num_layers = params.depth; 197 return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
198 case PixelFormat::S8_UINT_D24_UNORM:
199 return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
200 case PixelFormat::D16_UNORM:
201 case PixelFormat::D32_FLOAT:
202 return VK_IMAGE_ASPECT_DEPTH_BIT;
203 default:
204 return VK_IMAGE_ASPECT_COLOR_BIT;
217 } 205 }
218 main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
219} 206}
220 207
221CachedSurface::~CachedSurface() = default; 208[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device,
222 209 const ImageView* image_view) {
223void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { 210 const auto pixel_format = image_view->format;
224 // To upload data we have to be outside of a renderpass 211 return VkAttachmentDescription{
225 scheduler.RequestOutsideRenderPassOperationContext(); 212 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
213 .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format,
214 .samples = image_view->Samples(),
215 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
216 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
217 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
218 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
219 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
220 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
221 };
222}
226 223
227 if (params.IsBuffer()) { 224[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
228 UploadBuffer(staging_buffer); 225 switch (swizzle) {
229 } else { 226 case SwizzleSource::Zero:
230 UploadImage(staging_buffer); 227 return VK_COMPONENT_SWIZZLE_ZERO;
228 case SwizzleSource::R:
229 return VK_COMPONENT_SWIZZLE_R;
230 case SwizzleSource::G:
231 return VK_COMPONENT_SWIZZLE_G;
232 case SwizzleSource::B:
233 return VK_COMPONENT_SWIZZLE_B;
234 case SwizzleSource::A:
235 return VK_COMPONENT_SWIZZLE_A;
236 case SwizzleSource::OneFloat:
237 case SwizzleSource::OneInt:
238 return VK_COMPONENT_SWIZZLE_ONE;
231 } 239 }
240 UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
241 return VK_COMPONENT_SWIZZLE_ZERO;
232} 242}
233 243
234void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { 244[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
235 UNIMPLEMENTED_IF(params.IsBuffer()); 245 switch (type) {
236 246 case VideoCommon::ImageViewType::e1D:
237 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { 247 return VK_IMAGE_VIEW_TYPE_1D;
238 LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); 248 case VideoCommon::ImageViewType::e2D:
249 return VK_IMAGE_VIEW_TYPE_2D;
250 case VideoCommon::ImageViewType::Cube:
251 return VK_IMAGE_VIEW_TYPE_CUBE;
252 case VideoCommon::ImageViewType::e3D:
253 return VK_IMAGE_VIEW_TYPE_3D;
254 case VideoCommon::ImageViewType::e1DArray:
255 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
256 case VideoCommon::ImageViewType::e2DArray:
257 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
258 case VideoCommon::ImageViewType::CubeArray:
259 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
260 case VideoCommon::ImageViewType::Rect:
261 LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported");
262 return VK_IMAGE_VIEW_TYPE_2D;
263 case VideoCommon::ImageViewType::Buffer:
264 UNREACHABLE_MSG("Texture buffers can't be image views");
265 return VK_IMAGE_VIEW_TYPE_1D;
239 } 266 }
267 UNREACHABLE_MSG("Invalid image view type={}", type);
268 return VK_IMAGE_VIEW_TYPE_2D;
269}
240 270
241 // We can't copy images to buffers inside a renderpass 271[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers(
242 scheduler.RequestOutsideRenderPassOperationContext(); 272 VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) {
273 return VkImageSubresourceLayers{
274 .aspectMask = aspect_mask,
275 .mipLevel = static_cast<u32>(subresource.base_level),
276 .baseArrayLayer = static_cast<u32>(subresource.base_layer),
277 .layerCount = static_cast<u32>(subresource.num_layers),
278 };
279}
243 280
244 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, 281[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) {
245 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 282 return VkOffset3D{
283 .x = offset3d.x,
284 .y = offset3d.y,
285 .z = offset3d.z,
286 };
287}
246 288
247 const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 289[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) {
248 // TODO(Rodrigo): Do this in a single copy 290 return VkExtent3D{
249 for (u32 level = 0; level < params.num_levels; ++level) { 291 .width = static_cast<u32>(extent3d.width),
250 scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle, 292 .height = static_cast<u32>(extent3d.height),
251 copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { 293 .depth = static_cast<u32>(extent3d.depth),
252 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); 294 };
253 }); 295}
254 }
255 scheduler.Finish();
256 296
257 // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. 297[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy,
258 std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size); 298 VkImageAspectFlags aspect_mask) noexcept {
299 return VkImageCopy{
300 .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask),
301 .srcOffset = MakeOffset3D(copy.src_offset),
302 .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask),
303 .dstOffset = MakeOffset3D(copy.dst_offset),
304 .extent = MakeExtent3D(copy.extent),
305 };
259} 306}
260 307
261void CachedSurface::DecorateSurfaceName() { 308[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
262 // TODO(Rodrigo): Add name decorations 309 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
310 std::vector<VkBufferCopy> result(copies.size());
311 std::ranges::transform(
312 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
313 return VkBufferCopy{
314 .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset),
315 .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset),
316 .size = static_cast<VkDeviceSize>(copy.size),
317 };
318 });
319 return result;
263} 320}
264 321
265View CachedSurface::CreateView(const ViewParams& params) { 322[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
266 // TODO(Rodrigo): Add name decorations 323 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
267 return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params); 324 struct Maker {
325 VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
326 return VkBufferImageCopy{
327 .bufferOffset = copy.buffer_offset + buffer_offset,
328 .bufferRowLength = copy.buffer_row_length,
329 .bufferImageHeight = copy.buffer_image_height,
330 .imageSubresource =
331 {
332 .aspectMask = aspect_mask,
333 .mipLevel = static_cast<u32>(copy.image_subresource.base_level),
334 .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer),
335 .layerCount = static_cast<u32>(copy.image_subresource.num_layers),
336 },
337 .imageOffset =
338 {
339 .x = copy.image_offset.x,
340 .y = copy.image_offset.y,
341 .z = copy.image_offset.z,
342 },
343 .imageExtent =
344 {
345 .width = copy.image_extent.width,
346 .height = copy.image_extent.height,
347 .depth = copy.image_extent.depth,
348 },
349 };
350 }
351 size_t buffer_offset;
352 VkImageAspectFlags aspect_mask;
353 };
354 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
355 std::vector<VkBufferImageCopy> result(copies.size() * 2);
356 std::ranges::transform(copies, result.begin(),
357 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
358 std::ranges::transform(copies, result.begin() + copies.size(),
359 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
360 return result;
361 } else {
362 std::vector<VkBufferImageCopy> result(copies.size());
363 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
364 return result;
365 }
268} 366}
269 367
270void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { 368[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask,
271 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 369 const SubresourceRange& range) {
272 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 370 return VkImageSubresourceRange{
371 .aspectMask = aspect_mask,
372 .baseMipLevel = static_cast<u32>(range.base.level),
373 .levelCount = static_cast<u32>(range.extent.levels),
374 .baseArrayLayer = static_cast<u32>(range.base.layer),
375 .layerCount = static_cast<u32>(range.extent.layers),
376 };
377}
273 378
274 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, 379[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) {
275 size = host_memory_size](vk::CommandBuffer cmdbuf) { 380 SubresourceRange range = image_view->range;
276 VkBufferCopy copy; 381 if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
277 copy.srcOffset = 0; 382 // Slice image views always affect a single layer, but their subresource range corresponds
278 copy.dstOffset = 0; 383 // to the slice. Override the value to affect a single layer.
279 copy.size = size; 384 range.base.layer = 0;
280 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); 385 range.extent.layers = 1;
386 }
387 return MakeSubresourceRange(ImageAspectMask(image_view->format), range);
388}
281 389
282 VkBufferMemoryBarrier barrier; 390[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) {
283 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 391 return VkImageSubresourceLayers{
284 barrier.pNext = nullptr; 392 .aspectMask = ImageAspectMask(image_view->format),
285 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 393 .mipLevel = static_cast<u32>(image_view->range.base.level),
286 barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 394 .baseArrayLayer = static_cast<u32>(image_view->range.base.layer),
287 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway 395 .layerCount = static_cast<u32>(image_view->range.extent.layers),
288 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 396 };
289 barrier.buffer = dst_buffer;
290 barrier.offset = 0;
291 barrier.size = size;
292 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
293 0, {}, barrier, {});
294 });
295} 397}
296 398
297void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { 399[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
298 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 400 switch (value) {
299 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 401 case SwizzleSource::G:
300 402 return SwizzleSource::R;
301 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 403 default:
302 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 404 return value;
303
304 for (u32 level = 0; level < params.num_levels; ++level) {
305 const VkBufferImageCopy copy = GetBufferImageCopy(level);
306 if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
307 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
308 copy](vk::CommandBuffer cmdbuf) {
309 std::array<VkBufferImageCopy, 2> copies = {copy, copy};
310 copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
311 copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
312 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
313 copies);
314 });
315 } else {
316 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
317 copy](vk::CommandBuffer cmdbuf) {
318 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
319 });
320 }
321 } 405 }
322} 406}
323 407
324VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { 408void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
325 return { 409 VkImageAspectFlags aspect_mask, bool is_initialized,
326 .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), 410 std::span<const VkBufferImageCopy> copies) {
327 .bufferRowLength = 0, 411 static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
328 .bufferImageHeight = 0, 412 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
329 .imageSubresource = 413 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
414 const VkImageMemoryBarrier read_barrier{
415 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
416 .pNext = nullptr,
417 .srcAccessMask = ACCESS_FLAGS,
418 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
419 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
420 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
421 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
422 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
423 .image = image,
424 .subresourceRange =
330 { 425 {
331 .aspectMask = image->GetAspectMask(), 426 .aspectMask = aspect_mask,
332 .mipLevel = level, 427 .baseMipLevel = 0,
428 .levelCount = VK_REMAINING_MIP_LEVELS,
333 .baseArrayLayer = 0, 429 .baseArrayLayer = 0,
334 .layerCount = static_cast<u32>(params.GetNumLayers()), 430 .layerCount = VK_REMAINING_ARRAY_LAYERS,
335 }, 431 },
336 .imageOffset = {.x = 0, .y = 0, .z = 0}, 432 };
337 .imageExtent = 433 const VkImageMemoryBarrier write_barrier{
434 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
435 .pNext = nullptr,
436 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
437 .dstAccessMask = ACCESS_FLAGS,
438 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
439 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
440 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
441 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
442 .image = image,
443 .subresourceRange =
338 { 444 {
339 .width = params.GetMipWidth(level), 445 .aspectMask = aspect_mask,
340 .height = params.GetMipHeight(level), 446 .baseMipLevel = 0,
341 .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, 447 .levelCount = VK_REMAINING_MIP_LEVELS,
448 .baseArrayLayer = 0,
449 .layerCount = VK_REMAINING_ARRAY_LAYERS,
342 }, 450 },
343 }; 451 };
452 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
453 read_barrier);
454 cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
455 // TODO: Move this to another API
456 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
457 write_barrier);
344} 458}
345 459
346VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { 460[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region,
347 return {image->GetAspectMask(), 0, params.num_levels, 0, 461 const std::array<Offset2D, 2>& src_region,
348 static_cast<u32>(params.GetNumLayers())}; 462 const VkImageSubresourceLayers& dst_layers,
463 const VkImageSubresourceLayers& src_layers) {
464 return VkImageBlit{
465 .srcSubresource = src_layers,
466 .srcOffsets =
467 {
468 {
469 .x = src_region[0].x,
470 .y = src_region[0].y,
471 .z = 0,
472 },
473 {
474 .x = src_region[1].x,
475 .y = src_region[1].y,
476 .z = 1,
477 },
478 },
479 .dstSubresource = dst_layers,
480 .dstOffsets =
481 {
482 {
483 .x = dst_region[0].x,
484 .y = dst_region[0].y,
485 .z = 0,
486 },
487 {
488 .x = dst_region[1].x,
489 .y = dst_region[1].y,
490 .z = 1,
491 },
492 },
493 };
349} 494}
350 495
351CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, 496[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region,
352 const ViewParams& params) 497 const std::array<Offset2D, 2>& src_region,
353 : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, 498 const VkImageSubresourceLayers& dst_layers,
354 image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, 499 const VkImageSubresourceLayers& src_layers) {
355 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, 500 return VkImageResolve{
356 base_level{params.base_level}, num_levels{params.num_levels}, 501 .srcSubresource = src_layers,
357 image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} { 502 .srcOffset =
358 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 503 {
359 base_layer = 0; 504 .x = src_region[0].x,
360 num_layers = 1; 505 .y = src_region[0].y,
361 base_slice = params.base_layer; 506 .z = 0,
362 num_slices = params.num_layers; 507 },
363 } else { 508 .dstSubresource = dst_layers,
364 base_layer = params.base_layer; 509 .dstOffset =
365 num_layers = params.num_layers; 510 {
366 } 511 .x = dst_region[0].x,
512 .y = dst_region[0].y,
513 .z = 0,
514 },
515 .extent =
516 {
517 .width = static_cast<u32>(dst_region[1].x - dst_region[0].x),
518 .height = static_cast<u32>(dst_region[1].y - dst_region[0].y),
519 .depth = 1,
520 },
521 };
367} 522}
368 523
369CachedSurfaceView::~CachedSurfaceView() = default; 524struct RangedBarrierRange {
370 525 u32 min_mip = std::numeric_limits<u32>::max();
371VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, 526 u32 max_mip = std::numeric_limits<u32>::min();
372 SwizzleSource z_source, SwizzleSource w_source) { 527 u32 min_layer = std::numeric_limits<u32>::max();
373 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 528 u32 max_layer = std::numeric_limits<u32>::min();
374 if (last_image_view && last_swizzle == new_swizzle) { 529
375 return last_image_view; 530 void AddLayers(const VkImageSubresourceLayers& layers) {
531 min_mip = std::min(min_mip, layers.mipLevel);
532 max_mip = std::max(max_mip, layers.mipLevel + 1);
533 min_layer = std::min(min_layer, layers.baseArrayLayer);
534 max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
376 } 535 }
377 last_swizzle = new_swizzle;
378 536
379 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); 537 VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
380 auto& image_view = entry->second; 538 return VkImageSubresourceRange{
381 if (!is_cache_miss) { 539 .aspectMask = aspect_mask,
382 return last_image_view = *image_view; 540 .baseMipLevel = min_mip,
541 .levelCount = max_mip - min_mip,
542 .baseArrayLayer = min_layer,
543 .layerCount = max_layer - min_layer,
544 };
383 } 545 }
546};
384 547
385 std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), 548} // Anonymous namespace
386 MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
387 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
388 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
389 std::swap(swizzle[0], swizzle[2]);
390 }
391 549
392 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on 550void TextureCacheRuntime::Finish() {
393 // hardware. To emulate this on Vulkan we specify it in the aspect. 551 scheduler.Finish();
394 VkImageAspectFlags aspect = aspect_mask; 552}
395 if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
396 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
397 const bool is_first = x_source == SwizzleSource::R;
398 switch (params.pixel_format) {
399 case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
400 case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
401 aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
402 break;
403 case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
404 aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
405 break;
406 default:
407 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
408 UNIMPLEMENTED();
409 }
410 553
411 // Make sure we sample the first component 554ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
412 std::transform( 555 const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true);
413 swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { 556 return ImageBufferMap{
414 return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; 557 .handle = *buffer.handle,
415 }); 558 .map = buffer.commit->Map(size),
416 } 559 };
560}
417 561
418 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 562void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
419 ASSERT(base_slice == 0); 563 const std::array<Offset2D, 2>& dst_region,
420 ASSERT(num_slices == params.depth); 564 const std::array<Offset2D, 2>& src_region,
565 Tegra::Engines::Fermi2D::Filter filter,
566 Tegra::Engines::Fermi2D::Operation operation) {
567 const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);
568 const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT;
569 const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT;
570 ASSERT(aspect_mask == ImageAspectMask(dst.format));
571 if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
572 blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
573 operation);
574 return;
421 } 575 }
422 576 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
423 image_view = device.GetLogical().CreateImageView({ 577 if (!device.IsBlitDepthStencilSupported()) {
424 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 578 UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa);
425 .pNext = nullptr, 579 blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(),
426 .flags = 0, 580 dst_region, src_region, filter, operation);
427 .image = surface.GetImageHandle(), 581 return;
428 .viewType = image_view_type, 582 }
429 .format = surface.GetImage().GetFormat(), 583 }
430 .components = 584 ASSERT(src.ImageFormat() == dst.ImageFormat());
431 { 585 ASSERT(!(is_dst_msaa && !is_src_msaa));
432 .r = swizzle[0], 586 ASSERT(operation == Fermi2D::Operation::SrcCopy);
433 .g = swizzle[1], 587
434 .b = swizzle[2], 588 const VkImage dst_image = dst.ImageHandle();
435 .a = swizzle[3], 589 const VkImage src_image = src.ImageHandle();
590 const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst);
591 const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src);
592 const bool is_resolve = is_src_msaa && !is_dst_msaa;
593 scheduler.RequestOutsideRenderPassOperationContext();
594 scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers,
595 aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) {
596 const std::array read_barriers{
597 VkImageMemoryBarrier{
598 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
599 .pNext = nullptr,
600 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
601 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
602 VK_ACCESS_TRANSFER_WRITE_BIT,
603 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
604 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
605 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
606 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
607 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
608 .image = src_image,
609 .subresourceRange{
610 .aspectMask = aspect_mask,
611 .baseMipLevel = 0,
612 .levelCount = VK_REMAINING_MIP_LEVELS,
613 .baseArrayLayer = 0,
614 .layerCount = VK_REMAINING_ARRAY_LAYERS,
615 },
436 }, 616 },
437 .subresourceRange = 617 VkImageMemoryBarrier{
438 { 618 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
439 .aspectMask = aspect, 619 .pNext = nullptr,
440 .baseMipLevel = base_level, 620 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
441 .levelCount = num_levels, 621 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
442 .baseArrayLayer = base_layer, 622 VK_ACCESS_TRANSFER_WRITE_BIT,
443 .layerCount = num_layers, 623 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
624 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
625 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
626 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
627 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
628 .image = dst_image,
629 .subresourceRange{
630 .aspectMask = aspect_mask,
631 .baseMipLevel = 0,
632 .levelCount = VK_REMAINING_MIP_LEVELS,
633 .baseArrayLayer = 0,
634 .layerCount = VK_REMAINING_ARRAY_LAYERS,
635 },
636 },
637 };
638 VkImageMemoryBarrier write_barrier{
639 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
640 .pNext = nullptr,
641 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
642 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
643 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
644 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
645 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
646 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
647 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
648 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
649 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
650 .image = dst_image,
651 .subresourceRange{
652 .aspectMask = aspect_mask,
653 .baseMipLevel = 0,
654 .levelCount = VK_REMAINING_MIP_LEVELS,
655 .baseArrayLayer = 0,
656 .layerCount = VK_REMAINING_ARRAY_LAYERS,
444 }, 657 },
658 };
659 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
660 0, nullptr, nullptr, read_barriers);
661 if (is_resolve) {
662 cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
663 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
664 MakeImageResolve(dst_region, src_region, dst_layers, src_layers));
665 } else {
666 const bool is_linear = filter == Fermi2D::Filter::Bilinear;
667 const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
668 cmdbuf.BlitImage(
669 src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
670 MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter);
671 }
672 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
673 0, write_barrier);
445 }); 674 });
446
447 return last_image_view = *image_view;
448} 675}
449 676
450VkImageView CachedSurfaceView::GetAttachment() { 677void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
451 if (render_target) { 678 switch (dst_view.format) {
452 return *render_target; 679 case PixelFormat::R16_UNORM:
680 if (src_view.format == PixelFormat::D16_UNORM) {
681 return blit_image_helper.ConvertD16ToR16(dst, src_view);
682 }
683 break;
684 case PixelFormat::R32_FLOAT:
685 if (src_view.format == PixelFormat::D32_FLOAT) {
686 return blit_image_helper.ConvertD32ToR32(dst, src_view);
687 }
688 break;
689 case PixelFormat::D16_UNORM:
690 if (src_view.format == PixelFormat::R16_UNORM) {
691 return blit_image_helper.ConvertR16ToD16(dst, src_view);
692 }
693 break;
694 case PixelFormat::D32_FLOAT:
695 if (src_view.format == PixelFormat::R32_FLOAT) {
696 return blit_image_helper.ConvertR32ToD32(dst, src_view);
697 }
698 break;
699 default:
700 break;
453 } 701 }
702 UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format);
703}
454 704
455 VkImageViewCreateInfo ci{ 705void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
456 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 706 std::span<const VideoCommon::ImageCopy> copies) {
457 .pNext = nullptr, 707 std::vector<VkImageCopy> vk_copies(copies.size());
458 .flags = 0, 708 const VkImageAspectFlags aspect_mask = dst.AspectMask();
459 .image = surface.GetImageHandle(), 709 ASSERT(aspect_mask == src.AspectMask());
460 .viewType = VK_IMAGE_VIEW_TYPE_1D, 710
461 .format = surface.GetImage().GetFormat(), 711 std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) {
462 .components = 712 return MakeImageCopy(copy, aspect_mask);
463 { 713 });
464 .r = VK_COMPONENT_SWIZZLE_IDENTITY, 714 const VkImage dst_image = dst.Handle();
465 .g = VK_COMPONENT_SWIZZLE_IDENTITY, 715 const VkImage src_image = src.Handle();
466 .b = VK_COMPONENT_SWIZZLE_IDENTITY, 716 scheduler.RequestOutsideRenderPassOperationContext();
467 .a = VK_COMPONENT_SWIZZLE_IDENTITY, 717 scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
718 RangedBarrierRange dst_range;
719 RangedBarrierRange src_range;
720 for (const VkImageCopy& copy : vk_copies) {
721 dst_range.AddLayers(copy.dstSubresource);
722 src_range.AddLayers(copy.srcSubresource);
723 }
724 const std::array read_barriers{
725 VkImageMemoryBarrier{
726 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
727 .pNext = nullptr,
728 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
729 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
730 VK_ACCESS_TRANSFER_WRITE_BIT,
731 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
732 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
733 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
734 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
735 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
736 .image = src_image,
737 .subresourceRange = src_range.SubresourceRange(aspect_mask),
468 }, 738 },
469 .subresourceRange = 739 VkImageMemoryBarrier{
470 { 740 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
471 .aspectMask = aspect_mask, 741 .pNext = nullptr,
472 .baseMipLevel = base_level, 742 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
473 .levelCount = num_levels, 743 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
474 .baseArrayLayer = 0, 744 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
475 .layerCount = 0, 745 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
746 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
747 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
748 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
749 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
750 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
751 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
752 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
753 .image = dst_image,
754 .subresourceRange = dst_range.SubresourceRange(aspect_mask),
476 }, 755 },
477 }; 756 };
478 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 757 const VkImageMemoryBarrier write_barrier{
479 ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; 758 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
480 ci.subresourceRange.baseArrayLayer = base_slice; 759 .pNext = nullptr,
481 ci.subresourceRange.layerCount = num_slices; 760 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
761 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
762 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
763 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
764 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
765 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
766 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
767 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
768 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
769 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
770 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
771 .image = dst_image,
772 .subresourceRange = dst_range.SubresourceRange(aspect_mask),
773 };
774 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
775 0, {}, {}, read_barriers);
776 cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
777 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
778 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
779 0, write_barrier);
780 });
781}
782
783Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
784 VAddr cpu_addr_)
785 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
786 image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
787 aspect_mask(ImageAspectMask(info.format)) {
788 if (image) {
789 commit = runtime.memory_manager.Commit(image, false);
482 } else { 790 } else {
483 ci.viewType = image_view_type; 791 commit = runtime.memory_manager.Commit(buffer, false);
484 ci.subresourceRange.baseArrayLayer = base_layer; 792 }
485 ci.subresourceRange.layerCount = num_layers; 793 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
794 flags |= VideoCommon::ImageFlagBits::Converted;
795 }
796 if (runtime.device.HasDebuggingToolAttached()) {
797 if (image) {
798 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
799 } else {
800 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
801 }
486 } 802 }
487 render_target = device.GetLogical().CreateImageView(ci);
488 return *render_target;
489} 803}
490 804
491VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer, 805void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
492 Tegra::Engines::Maxwell3D& maxwell3d, 806 std::span<const BufferImageCopy> copies) {
493 Tegra::MemoryManager& gpu_memory, const VKDevice& device_, 807 // TODO: Move this to another API
494 VKMemoryManager& memory_manager_, VKScheduler& scheduler_, 808 scheduler->RequestOutsideRenderPassOperationContext();
495 VKStagingBufferPool& staging_pool_) 809 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
496 : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()), 810 const VkBuffer src_buffer = map.handle;
497 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ 811 const VkImage vk_image = *image;
498 staging_pool_} {} 812 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
499 813 const bool is_initialized = std::exchange(initialized, true);
500VKTextureCache::~VKTextureCache() = default; 814 scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
501 815 vk_copies](vk::CommandBuffer cmdbuf) {
502Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 816 CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
503 return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, 817 });
504 gpu_addr, params);
505} 818}
506 819
507void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, 820void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
508 const VideoCommon::CopyParams& copy_params) { 821 std::span<const VideoCommon::BufferCopy> copies) {
509 const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; 822 // TODO: Move this to another API
510 const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; 823 scheduler->RequestOutsideRenderPassOperationContext();
511 UNIMPLEMENTED_IF(src_3d); 824 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
825 const VkBuffer src_buffer = map.handle;
826 const VkBuffer dst_buffer = *buffer;
827 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
828 // TODO: Barriers
829 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
830 });
831}
512 832
513 // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and 833void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
514 // dimension respectively. 834 std::span<const BufferImageCopy> copies) {
515 const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; 835 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
516 const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; 836 scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
837 vk_copies](vk::CommandBuffer cmdbuf) {
838 // TODO: Barriers
839 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
840 });
841}
517 842
518 const u32 extent_z = dst_3d ? copy_params.depth : 1; 843ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
519 const u32 num_layers = dst_3d ? 1 : copy_params.depth; 844 ImageId image_id_, Image& image)
845 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
846 image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount(
847 image.info.num_samples)} {
848 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
849 std::array<SwizzleSource, 4> swizzle{
850 SwizzleSource::R,
851 SwizzleSource::G,
852 SwizzleSource::B,
853 SwizzleSource::A,
854 };
855 if (!info.IsRenderTarget()) {
856 swizzle = info.Swizzle();
857 if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
858 std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
859 }
860 }
861 const VkFormat vk_format =
862 MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format;
863 const VkImageViewCreateInfo create_info{
864 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
865 .pNext = nullptr,
866 .flags = 0,
867 .image = image.Handle(),
868 .viewType = VkImageViewType{},
869 .format = vk_format,
870 .components{
871 .r = ComponentSwizzle(swizzle[0]),
872 .g = ComponentSwizzle(swizzle[1]),
873 .b = ComponentSwizzle(swizzle[2]),
874 .a = ComponentSwizzle(swizzle[3]),
875 },
876 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
877 };
878 const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) {
879 VkImageViewCreateInfo ci{create_info};
880 ci.viewType = ImageViewType(view_type);
881 if (num_layers) {
882 ci.subresourceRange.layerCount = *num_layers;
883 }
884 vk::ImageView handle = device->GetLogical().CreateImageView(ci);
885 if (device->HasDebuggingToolAttached()) {
886 handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str());
887 }
888 image_views[static_cast<size_t>(view_type)] = std::move(handle);
889 };
890 switch (info.type) {
891 case VideoCommon::ImageViewType::e1D:
892 case VideoCommon::ImageViewType::e1DArray:
893 create(VideoCommon::ImageViewType::e1D, 1);
894 create(VideoCommon::ImageViewType::e1DArray, std::nullopt);
895 render_target = Handle(VideoCommon::ImageViewType::e1DArray);
896 break;
897 case VideoCommon::ImageViewType::e2D:
898 case VideoCommon::ImageViewType::e2DArray:
899 create(VideoCommon::ImageViewType::e2D, 1);
900 create(VideoCommon::ImageViewType::e2DArray, std::nullopt);
901 render_target = Handle(VideoCommon::ImageViewType::e2DArray);
902 break;
903 case VideoCommon::ImageViewType::e3D:
904 create(VideoCommon::ImageViewType::e3D, std::nullopt);
905 render_target = Handle(VideoCommon::ImageViewType::e3D);
906 break;
907 case VideoCommon::ImageViewType::Cube:
908 case VideoCommon::ImageViewType::CubeArray:
909 create(VideoCommon::ImageViewType::Cube, 6);
910 create(VideoCommon::ImageViewType::CubeArray, std::nullopt);
911 break;
912 case VideoCommon::ImageViewType::Rect:
913 UNIMPLEMENTED();
914 break;
915 case VideoCommon::ImageViewType::Buffer:
916 buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{
917 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
918 .pNext = nullptr,
919 .flags = 0,
920 .buffer = image.Buffer(),
921 .format = vk_format,
922 .offset = 0, // TODO: Redesign buffer cache to support this
923 .range = image.guest_size_bytes,
924 });
925 break;
926 }
927}
520 928
521 // We can't copy inside a renderpass 929ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
522 scheduler.RequestOutsideRenderPassOperationContext(); 930 : VideoCommon::ImageViewBase{params} {}
523 931
524 src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, 932VkImageView ImageView::DepthView() {
525 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, 933 if (depth_view) {
526 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 934 return *depth_view;
527 dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, 935 }
528 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 936 depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT);
529 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 937 return *depth_view;
938}
530 939
531 const VkImageCopy copy{ 940VkImageView ImageView::StencilView() {
532 .srcSubresource = 941 if (stencil_view) {
533 { 942 return *stencil_view;
534 .aspectMask = src_surface->GetAspectMask(), 943 }
535 .mipLevel = copy_params.source_level, 944 stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT);
536 .baseArrayLayer = copy_params.source_z, 945 return *stencil_view;
537 .layerCount = num_layers, 946}
538 },
539 .srcOffset =
540 {
541 .x = static_cast<s32>(copy_params.source_x),
542 .y = static_cast<s32>(copy_params.source_y),
543 .z = 0,
544 },
545 .dstSubresource =
546 {
547 .aspectMask = dst_surface->GetAspectMask(),
548 .mipLevel = copy_params.dest_level,
549 .baseArrayLayer = dst_base_layer,
550 .layerCount = num_layers,
551 },
552 .dstOffset =
553 {
554 .x = static_cast<s32>(copy_params.dest_x),
555 .y = static_cast<s32>(copy_params.dest_y),
556 .z = static_cast<s32>(dst_offset_z),
557 },
558 .extent =
559 {
560 .width = copy_params.width,
561 .height = copy_params.height,
562 .depth = extent_z,
563 },
564 };
565 947
566 const VkImage src_image = src_surface->GetImageHandle(); 948vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) {
567 const VkImage dst_image = dst_surface->GetImageHandle(); 949 return device->GetLogical().CreateImageView({
568 scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { 950 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
569 cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, 951 .pNext = nullptr,
570 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); 952 .flags = 0,
953 .image = image_handle,
954 .viewType = ImageViewType(type),
955 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format,
956 .components{
957 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
958 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
959 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
960 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
961 },
962 .subresourceRange = MakeSubresourceRange(aspect_mask, range),
571 }); 963 });
572} 964}
573 965
574void VKTextureCache::ImageBlit(View& src_view, View& dst_view, 966Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) {
575 const Tegra::Engines::Fermi2D::Config& copy_config) { 967 const auto& device = runtime.device;
576 // We can't blit inside a renderpass 968 const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported();
577 scheduler.RequestOutsideRenderPassOperationContext(); 969 const std::array<float, 4> color = tsc.BorderColor();
578 970 // C++20 bit_cast
579 src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, 971 VkClearColorValue border_color;
580 VK_ACCESS_TRANSFER_READ_BIT); 972 std::memcpy(&border_color, &color, sizeof(color));
581 dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, 973 const VkSamplerCustomBorderColorCreateInfoEXT border_ci{
582 VK_ACCESS_TRANSFER_WRITE_BIT); 974 .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
583 975 .pNext = nullptr,
584 VkImageBlit blit; 976 .customBorderColor = border_color,
585 blit.srcSubresource = src_view->GetImageSubresourceLayers(); 977 .format = VK_FORMAT_UNDEFINED,
586 blit.srcOffsets[0].x = copy_config.src_rect.left; 978 };
587 blit.srcOffsets[0].y = copy_config.src_rect.top; 979 const void* pnext = nullptr;
588 blit.srcOffsets[0].z = 0; 980 if (arbitrary_borders) {
589 blit.srcOffsets[1].x = copy_config.src_rect.right; 981 pnext = &border_ci;
590 blit.srcOffsets[1].y = copy_config.src_rect.bottom; 982 }
591 blit.srcOffsets[1].z = 1; 983 const VkSamplerReductionModeCreateInfoEXT reduction_ci{
592 blit.dstSubresource = dst_view->GetImageSubresourceLayers(); 984 .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT,
593 blit.dstOffsets[0].x = copy_config.dst_rect.left; 985 .pNext = pnext,
594 blit.dstOffsets[0].y = copy_config.dst_rect.top; 986 .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter),
595 blit.dstOffsets[0].z = 0; 987 };
596 blit.dstOffsets[1].x = copy_config.dst_rect.right; 988 if (runtime.device.IsExtSamplerFilterMinmaxSupported()) {
597 blit.dstOffsets[1].y = copy_config.dst_rect.bottom; 989 pnext = &reduction_ci;
598 blit.dstOffsets[1].z = 1; 990 } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) {
599 991 LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required");
600 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; 992 }
601 993 // Some games have samplers with garbage. Sanitize them here.
602 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, 994 const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
603 is_linear](vk::CommandBuffer cmdbuf) { 995 sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
604 cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, 996 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
605 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, 997 .pNext = pnext,
606 is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); 998 .flags = 0,
999 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
1000 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
1001 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
1002 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
1003 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
1004 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
1005 .mipLodBias = tsc.LodBias(),
1006 .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
1007 .maxAnisotropy = max_anisotropy,
1008 .compareEnable = tsc.depth_compare_enabled,
1009 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
1010 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
1011 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
1012 .borderColor =
1013 arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
1014 .unnormalizedCoordinates = VK_FALSE,
607 }); 1015 });
608} 1016}
609 1017
610void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { 1018Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
611 // Currently unimplemented. PBO copies should be dropped and we should use a render pass to 1019 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
612 // convert from color to depth and viceversa. 1020 std::vector<VkAttachmentDescription> descriptions;
613 LOG_WARNING(Render_Vulkan, "Unimplemented"); 1021 std::vector<VkImageView> attachments;
1022 RenderPassKey renderpass_key{};
1023 s32 num_layers = 1;
1024
1025 for (size_t index = 0; index < NUM_RT; ++index) {
1026 const ImageView* const color_buffer = color_buffers[index];
1027 if (!color_buffer) {
1028 renderpass_key.color_formats[index] = PixelFormat::Invalid;
1029 continue;
1030 }
1031 descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
1032 attachments.push_back(color_buffer->RenderTarget());
1033 renderpass_key.color_formats[index] = color_buffer->format;
1034 num_layers = std::max(num_layers, color_buffer->range.extent.layers);
1035 images[num_images] = color_buffer->ImageHandle();
1036 image_ranges[num_images] = MakeSubresourceRange(color_buffer);
1037 samples = color_buffer->Samples();
1038 ++num_images;
1039 }
1040 const size_t num_colors = attachments.size();
1041 const VkAttachmentReference* depth_attachment =
1042 depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
1043 if (depth_buffer) {
1044 descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
1045 attachments.push_back(depth_buffer->RenderTarget());
1046 renderpass_key.depth_format = depth_buffer->format;
1047 num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
1048 images[num_images] = depth_buffer->ImageHandle();
1049 image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
1050 samples = depth_buffer->Samples();
1051 ++num_images;
1052 } else {
1053 renderpass_key.depth_format = PixelFormat::Invalid;
1054 }
1055 renderpass_key.samples = samples;
1056
1057 const auto& device = runtime.device.GetLogical();
1058 const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
1059 if (is_new) {
1060 const VkSubpassDescription subpass{
1061 .flags = 0,
1062 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1063 .inputAttachmentCount = 0,
1064 .pInputAttachments = nullptr,
1065 .colorAttachmentCount = static_cast<u32>(num_colors),
1066 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
1067 .pResolveAttachments = nullptr,
1068 .pDepthStencilAttachment = depth_attachment,
1069 .preserveAttachmentCount = 0,
1070 .pPreserveAttachments = nullptr,
1071 };
1072 cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
1073 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1074 .pNext = nullptr,
1075 .flags = 0,
1076 .attachmentCount = static_cast<u32>(descriptions.size()),
1077 .pAttachments = descriptions.data(),
1078 .subpassCount = 1,
1079 .pSubpasses = &subpass,
1080 .dependencyCount = 0,
1081 .pDependencies = nullptr,
1082 });
1083 }
1084 renderpass = *cache_pair->second;
1085 render_area = VkExtent2D{
1086 .width = key.size.width,
1087 .height = key.size.height,
1088 };
1089 num_color_buffers = static_cast<u32>(num_colors);
1090 framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
1091 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1092 .pNext = nullptr,
1093 .flags = 0,
1094 .renderPass = renderpass,
1095 .attachmentCount = static_cast<u32>(attachments.size()),
1096 .pAttachments = attachments.data(),
1097 .width = key.size.width,
1098 .height = key.size.height,
1099 .layers = static_cast<u32>(num_layers),
1100 });
1101 if (runtime.device.HasDebuggingToolAttached()) {
1102 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
1103 }
614} 1104}
615 1105
616} // namespace Vulkan 1106} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 39202feba..edc3d80c0 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -4,216 +4,265 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <compare>
8#include <unordered_map> 8#include <span>
9 9
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/vk_image.h"
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 10#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/renderer_vulkan/wrapper.h"
15#include "video_core/texture_cache/surface_base.h"
16#include "video_core/texture_cache/texture_cache.h" 12#include "video_core/texture_cache/texture_cache.h"
17 13
18namespace VideoCore {
19class RasterizerInterface;
20}
21
22namespace Vulkan { 14namespace Vulkan {
23 15
24class RasterizerVulkan; 16using VideoCommon::ImageId;
17using VideoCommon::NUM_RT;
18using VideoCommon::Offset2D;
19using VideoCommon::RenderTargets;
20using VideoCore::Surface::PixelFormat;
21
25class VKDevice; 22class VKDevice;
26class VKScheduler; 23class VKScheduler;
27class VKStagingBufferPool; 24class VKStagingBufferPool;
28 25
29class CachedSurfaceView; 26class BlitImageHelper;
30class CachedSurface; 27class Image;
28class ImageView;
29class Framebuffer;
31 30
32using Surface = std::shared_ptr<CachedSurface>; 31struct RenderPassKey {
33using View = std::shared_ptr<CachedSurfaceView>; 32 constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
34using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
35 33
36using VideoCommon::SurfaceParams; 34 std::array<PixelFormat, NUM_RT> color_formats;
37using VideoCommon::ViewParams; 35 PixelFormat depth_format;
36 VkSampleCountFlagBits samples;
37};
38 38
39class CachedSurface final : public VideoCommon::SurfaceBase<View> { 39} // namespace Vulkan
40 friend CachedSurfaceView;
41 40
42public: 41namespace std {
43 explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, 42template <>
44 VKScheduler& scheduler, VKStagingBufferPool& staging_pool, 43struct hash<Vulkan::RenderPassKey> {
45 GPUVAddr gpu_addr, const SurfaceParams& params); 44 [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
46 ~CachedSurface(); 45 size_t value = static_cast<size_t>(key.depth_format) << 48;
46 value ^= static_cast<size_t>(key.samples) << 52;
47 for (size_t i = 0; i < key.color_formats.size(); ++i) {
48 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
49 }
50 return value;
51 }
52};
53} // namespace std
47 54
48 void UploadTexture(const std::vector<u8>& staging_buffer) override; 55namespace Vulkan {
49 void DownloadTexture(std::vector<u8>& staging_buffer) override;
50 56
51 void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, 57struct ImageBufferMap {
52 VkImageLayout new_layout) { 58 [[nodiscard]] VkBuffer Handle() const noexcept {
53 image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, 59 return handle;
54 new_stage_mask, new_access, new_layout);
55 } 60 }
56 61
57 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 62 [[nodiscard]] std::span<u8> Span() const noexcept {
58 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, 63 return map.Span();
59 VkImageLayout new_layout) {
60 image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
61 new_access, new_layout);
62 } 64 }
63 65
64 VKImage& GetImage() { 66 VkBuffer handle;
65 return *image; 67 MemoryMap map;
66 } 68};
67 69
68 const VKImage& GetImage() const { 70struct TextureCacheRuntime {
69 return *image; 71 const VKDevice& device;
70 } 72 VKScheduler& scheduler;
73 VKMemoryManager& memory_manager;
74 VKStagingBufferPool& staging_buffer_pool;
75 BlitImageHelper& blit_image_helper;
76 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache;
77
78 void Finish();
71 79
72 VkImage GetImageHandle() const { 80 [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
73 return *image->GetHandle(); 81
82 [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) {
83 // TODO: Have a special function for this
84 return MapUploadBuffer(size);
74 } 85 }
75 86
76 VkImageAspectFlags GetAspectMask() const { 87 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
77 return image->GetAspectMask(); 88 const std::array<Offset2D, 2>& dst_region,
89 const std::array<Offset2D, 2>& src_region,
90 Tegra::Engines::Fermi2D::Filter filter,
91 Tegra::Engines::Fermi2D::Operation operation);
92
93 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
94
95 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
96
97 [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
98 return false;
78 } 99 }
79 100
80 VkBufferView GetBufferViewHandle() const { 101 void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
81 return *buffer_view; 102 std::span<const VideoCommon::SwizzleParameters>) {
103 UNREACHABLE();
82 } 104 }
83 105
84protected: 106 void InsertUploadMemoryBarrier() {}
85 void DecorateSurfaceName(); 107};
86 108
87 View CreateView(const ViewParams& params) override; 109class Image : public VideoCommon::ImageBase {
110public:
111 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
112 VAddr cpu_addr);
88 113
89private: 114 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
90 void UploadBuffer(const std::vector<u8>& staging_buffer); 115 std::span<const VideoCommon::BufferImageCopy> copies);
91 116
92 void UploadImage(const std::vector<u8>& staging_buffer); 117 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
118 std::span<const VideoCommon::BufferCopy> copies);
93 119
94 VkBufferImageCopy GetBufferImageCopy(u32 level) const; 120 void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
121 std::span<const VideoCommon::BufferImageCopy> copies);
95 122
96 VkImageSubresourceRange GetImageSubresourceRange() const; 123 [[nodiscard]] VkImage Handle() const noexcept {
124 return *image;
125 }
97 126
98 const VKDevice& device; 127 [[nodiscard]] VkBuffer Buffer() const noexcept {
99 VKMemoryManager& memory_manager; 128 return *buffer;
100 VKScheduler& scheduler; 129 }
101 VKStagingBufferPool& staging_pool; 130
131 [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept {
132 return aspect_mask;
133 }
102 134
103 std::optional<VKImage> image; 135private:
136 VKScheduler* scheduler;
137 vk::Image image;
104 vk::Buffer buffer; 138 vk::Buffer buffer;
105 vk::BufferView buffer_view;
106 VKMemoryCommit commit; 139 VKMemoryCommit commit;
107 140 VkImageAspectFlags aspect_mask = 0;
108 VkFormat format = VK_FORMAT_UNDEFINED; 141 bool initialized = false;
109}; 142};
110 143
111class CachedSurfaceView final : public VideoCommon::ViewBase { 144class ImageView : public VideoCommon::ImageViewBase {
112public: 145public:
113 explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, 146 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
114 const ViewParams& params); 147 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
115 ~CachedSurfaceView();
116 148
117 VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, 149 [[nodiscard]] VkImageView DepthView();
118 Tegra::Texture::SwizzleSource y_source,
119 Tegra::Texture::SwizzleSource z_source,
120 Tegra::Texture::SwizzleSource w_source);
121 150
122 VkImageView GetAttachment(); 151 [[nodiscard]] VkImageView StencilView();
123 152
124 bool IsSameSurface(const CachedSurfaceView& rhs) const { 153 [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
125 return &surface == &rhs.surface; 154 return *image_views[static_cast<size_t>(query_type)];
126 } 155 }
127 156
128 u32 GetWidth() const { 157 [[nodiscard]] VkBufferView BufferView() const noexcept {
129 return params.GetMipWidth(base_level); 158 return *buffer_view;
130 } 159 }
131 160
132 u32 GetHeight() const { 161 [[nodiscard]] VkImage ImageHandle() const noexcept {
133 return params.GetMipHeight(base_level); 162 return image_handle;
134 } 163 }
135 164
136 u32 GetNumLayers() const { 165 [[nodiscard]] VkImageView RenderTarget() const noexcept {
137 return num_layers; 166 return render_target;
138 } 167 }
139 168
140 bool IsBufferView() const { 169 [[nodiscard]] PixelFormat ImageFormat() const noexcept {
141 return buffer_view; 170 return image_format;
142 } 171 }
143 172
144 VkImage GetImage() const { 173 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
145 return image; 174 return samples;
146 } 175 }
147 176
148 VkBufferView GetBufferView() const { 177private:
149 return buffer_view; 178 [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
150 }
151 179
152 VkImageSubresourceRange GetImageSubresourceRange() const { 180 const VKDevice* device = nullptr;
153 return {aspect_mask, base_level, num_levels, base_layer, num_layers}; 181 std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
154 } 182 vk::ImageView depth_view;
183 vk::ImageView stencil_view;
184 vk::BufferView buffer_view;
185 VkImage image_handle = VK_NULL_HANDLE;
186 VkImageView render_target = VK_NULL_HANDLE;
187 PixelFormat image_format = PixelFormat::Invalid;
188 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
189};
155 190
156 VkImageSubresourceLayers GetImageSubresourceLayers() const { 191class ImageAlloc : public VideoCommon::ImageAllocBase {};
157 return {surface.GetAspectMask(), base_level, base_layer, num_layers};
158 }
159 192
160 void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, 193class Sampler {
161 VkAccessFlags new_access) const { 194public:
162 surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, 195 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
163 new_access, new_layout);
164 }
165 196
166 void MarkAsModified(u64 tick) { 197 [[nodiscard]] VkSampler Handle() const noexcept {
167 surface.MarkAsModified(true, tick); 198 return *sampler;
168 } 199 }
169 200
170private: 201private:
171 // Store a copy of these values to avoid double dereference when reading them 202 vk::Sampler sampler;
172 const SurfaceParams params;
173 const VkImage image;
174 const VkBufferView buffer_view;
175 const VkImageAspectFlags aspect_mask;
176
177 const VKDevice& device;
178 CachedSurface& surface;
179 const u32 base_level;
180 const u32 num_levels;
181 const VkImageViewType image_view_type;
182 u32 base_layer = 0;
183 u32 num_layers = 0;
184 u32 base_slice = 0;
185 u32 num_slices = 0;
186
187 VkImageView last_image_view = nullptr;
188 u32 last_swizzle = 0;
189
190 vk::ImageView render_target;
191 std::unordered_map<u32, vk::ImageView> view_cache;
192}; 203};
193 204
194class VKTextureCache final : public TextureCacheBase { 205class Framebuffer {
195public: 206public:
196 explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer, 207 explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
197 Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, 208 ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
198 const VKDevice& device, VKMemoryManager& memory_manager,
199 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
200 ~VKTextureCache();
201 209
202private: 210 [[nodiscard]] VkFramebuffer Handle() const noexcept {
203 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; 211 return *framebuffer;
212 }
204 213
205 void ImageCopy(Surface& src_surface, Surface& dst_surface, 214 [[nodiscard]] VkRenderPass RenderPass() const noexcept {
206 const VideoCommon::CopyParams& copy_params) override; 215 return renderpass;
216 }
207 217
208 void ImageBlit(View& src_view, View& dst_view, 218 [[nodiscard]] VkExtent2D RenderArea() const noexcept {
209 const Tegra::Engines::Fermi2D::Config& copy_config) override; 219 return render_area;
220 }
210 221
211 void BufferCopy(Surface& src_surface, Surface& dst_surface) override; 222 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
223 return samples;
224 }
212 225
213 const VKDevice& device; 226 [[nodiscard]] u32 NumColorBuffers() const noexcept {
214 VKMemoryManager& memory_manager; 227 return num_color_buffers;
215 VKScheduler& scheduler; 228 }
216 VKStagingBufferPool& staging_pool; 229
230 [[nodiscard]] u32 NumImages() const noexcept {
231 return num_images;
232 }
233
234 [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
235 return images;
236 }
237
238 [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
239 return image_ranges;
240 }
241
242private:
243 vk::Framebuffer framebuffer;
244 VkRenderPass renderpass{};
245 VkExtent2D render_area{};
246 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
247 u32 num_color_buffers = 0;
248 u32 num_images = 0;
249 std::array<VkImage, 9> images{};
250 std::array<VkImageSubresourceRange, 9> image_ranges{};
251};
252
253struct TextureCacheParams {
254 static constexpr bool ENABLE_VALIDATION = true;
255 static constexpr bool FRAMEBUFFER_BLITS = false;
256 static constexpr bool HAS_EMULATED_COPIES = false;
257
258 using Runtime = Vulkan::TextureCacheRuntime;
259 using Image = Vulkan::Image;
260 using ImageAlloc = Vulkan::ImageAlloc;
261 using ImageView = Vulkan::ImageView;
262 using Sampler = Vulkan::Sampler;
263 using Framebuffer = Vulkan::Framebuffer;
217}; 264};
218 265
266using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
267
219} // namespace Vulkan 268} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 351c048d2..8826da325 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -14,8 +14,8 @@
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler) 17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_)
18 : device{device}, scheduler{scheduler} {} 18 : device{device_}, scheduler{scheduler_} {}
19 19
20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; 20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
21 21
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 945320c72..f098a8540 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -31,7 +31,7 @@ struct DescriptorUpdateEntry {
31 31
32class VKUpdateDescriptorQueue final { 32class VKUpdateDescriptorQueue final {
33public: 33public:
34 explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler); 34 explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_);
35 ~VKUpdateDescriptorQueue(); 35 ~VKUpdateDescriptorQueue();
36 36
37 void TickFrame(); 37 void TickFrame();
@@ -40,30 +40,34 @@ public:
40 40
41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
42 42
43 void AddSampledImage(VkSampler sampler, VkImageView image_view) { 43 void AddSampledImage(VkImageView image_view, VkSampler sampler) {
44 payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); 44 payload.emplace_back(VkDescriptorImageInfo{
45 .sampler = sampler,
46 .imageView = image_view,
47 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
48 });
45 } 49 }
46 50
47 void AddImage(VkImageView image_view) { 51 void AddImage(VkImageView image_view) {
48 payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); 52 payload.emplace_back(VkDescriptorImageInfo{
53 .sampler = VK_NULL_HANDLE,
54 .imageView = image_view,
55 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
56 });
49 } 57 }
50 58
51 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { 59 void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
52 payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); 60 payload.emplace_back(VkDescriptorBufferInfo{
61 .buffer = buffer,
62 .offset = offset,
63 .range = size,
64 });
53 } 65 }
54 66
55 void AddTexelBuffer(VkBufferView texel_buffer) { 67 void AddTexelBuffer(VkBufferView texel_buffer) {
56 payload.emplace_back(texel_buffer); 68 payload.emplace_back(texel_buffer);
57 } 69 }
58 70
59 VkImageLayout* LastImageLayout() {
60 return &payload.back().image.imageLayout;
61 }
62
63 const VkImageLayout* LastImageLayout() const {
64 return &payload.back().image.imageLayout;
65 }
66
67private: 71private:
68 const VKDevice& device; 72 const VKDevice& device;
69 VKScheduler& scheduler; 73 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 4e83303d8..2a21e850d 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
81 X(vkCmdBeginQuery); 81 X(vkCmdBeginQuery);
82 X(vkCmdBeginRenderPass); 82 X(vkCmdBeginRenderPass);
83 X(vkCmdBeginTransformFeedbackEXT); 83 X(vkCmdBeginTransformFeedbackEXT);
84 X(vkCmdBeginDebugUtilsLabelEXT);
84 X(vkCmdBindDescriptorSets); 85 X(vkCmdBindDescriptorSets);
85 X(vkCmdBindIndexBuffer); 86 X(vkCmdBindIndexBuffer);
86 X(vkCmdBindPipeline); 87 X(vkCmdBindPipeline);
@@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
98 X(vkCmdEndQuery); 99 X(vkCmdEndQuery);
99 X(vkCmdEndRenderPass); 100 X(vkCmdEndRenderPass);
100 X(vkCmdEndTransformFeedbackEXT); 101 X(vkCmdEndTransformFeedbackEXT);
102 X(vkCmdEndDebugUtilsLabelEXT);
101 X(vkCmdFillBuffer); 103 X(vkCmdFillBuffer);
102 X(vkCmdPipelineBarrier); 104 X(vkCmdPipelineBarrier);
103 X(vkCmdPushConstants); 105 X(vkCmdPushConstants);
@@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
121 X(vkCmdSetPrimitiveTopologyEXT); 123 X(vkCmdSetPrimitiveTopologyEXT);
122 X(vkCmdSetStencilOpEXT); 124 X(vkCmdSetStencilOpEXT);
123 X(vkCmdSetStencilTestEnableEXT); 125 X(vkCmdSetStencilTestEnableEXT);
126 X(vkCmdResolveImage);
124 X(vkCreateBuffer); 127 X(vkCreateBuffer);
125 X(vkCreateBufferView); 128 X(vkCreateBufferView);
126 X(vkCreateCommandPool); 129 X(vkCreateCommandPool);
@@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
176 X(vkQueueSubmit); 179 X(vkQueueSubmit);
177 X(vkResetFences); 180 X(vkResetFences);
178 X(vkResetQueryPoolEXT); 181 X(vkResetQueryPoolEXT);
182 X(vkSetDebugUtilsObjectNameEXT);
183 X(vkSetDebugUtilsObjectTagEXT);
179 X(vkUnmapMemory); 184 X(vkUnmapMemory);
180 X(vkUpdateDescriptorSetWithTemplateKHR); 185 X(vkUpdateDescriptorSetWithTemplateKHR);
181 X(vkUpdateDescriptorSets); 186 X(vkUpdateDescriptorSets);
@@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
184#undef X 189#undef X
185} 190}
186 191
192template <typename T>
193void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type,
194 const char* name) {
195 const VkDebugUtilsObjectNameInfoEXT name_info{
196 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
197 .pNext = nullptr,
198 .objectType = VK_OBJECT_TYPE_IMAGE,
199 .objectHandle = reinterpret_cast<u64>(handle),
200 .pObjectName = name,
201 };
202 Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
203}
204
187} // Anonymous namespace 205} // Anonymous namespace
188 206
189bool Load(InstanceDispatch& dld) noexcept { 207bool Load(InstanceDispatch& dld) noexcept {
@@ -417,7 +435,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
417} 435}
418 436
419Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions, 437Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
420 InstanceDispatch& dld) noexcept { 438 InstanceDispatch& dispatch) noexcept {
421 const VkApplicationInfo application_info{ 439 const VkApplicationInfo application_info{
422 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, 440 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
423 .pNext = nullptr, 441 .pNext = nullptr,
@@ -439,17 +457,17 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char
439 }; 457 };
440 458
441 VkInstance instance; 459 VkInstance instance;
442 if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { 460 if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
443 // Failed to create the instance. 461 // Failed to create the instance.
444 return {}; 462 return {};
445 } 463 }
446 if (!Proc(dld.vkDestroyInstance, dld, "vkDestroyInstance", instance)) { 464 if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) {
447 // We successfully created an instance but the destroy function couldn't be loaded. 465 // We successfully created an instance but the destroy function couldn't be loaded.
448 // This is a good moment to panic. 466 // This is a good moment to panic.
449 return {}; 467 return {};
450 } 468 }
451 469
452 return Instance(instance, dld); 470 return Instance(instance, dispatch);
453} 471}
454 472
455std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() { 473std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() {
@@ -476,8 +494,7 @@ DebugCallback Instance::TryCreateDebugCallback(
476 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | 494 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
477 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, 495 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
478 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | 496 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
479 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | 497 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
480 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
481 .pfnUserCallback = callback, 498 .pfnUserCallback = callback,
482 .pUserData = nullptr, 499 .pUserData = nullptr,
483 }; 500 };
@@ -493,10 +510,38 @@ void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
493 Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); 510 Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
494} 511}
495 512
513void Buffer::SetObjectNameEXT(const char* name) const {
514 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
515}
516
517void BufferView::SetObjectNameEXT(const char* name) const {
518 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
519}
520
496void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { 521void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
497 Check(dld->vkBindImageMemory(owner, handle, memory, offset)); 522 Check(dld->vkBindImageMemory(owner, handle, memory, offset));
498} 523}
499 524
525void Image::SetObjectNameEXT(const char* name) const {
526 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
527}
528
529void ImageView::SetObjectNameEXT(const char* name) const {
530 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
531}
532
533void DeviceMemory::SetObjectNameEXT(const char* name) const {
534 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
535}
536
537void Fence::SetObjectNameEXT(const char* name) const {
538 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name);
539}
540
541void Framebuffer::SetObjectNameEXT(const char* name) const {
542 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name);
543}
544
500DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { 545DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
501 const std::size_t num = ai.descriptorSetCount; 546 const std::size_t num = ai.descriptorSetCount;
502 std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); 547 std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
@@ -510,6 +555,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c
510 } 555 }
511} 556}
512 557
558void DescriptorPool::SetObjectNameEXT(const char* name) const {
559 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name);
560}
561
513CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { 562CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
514 const VkCommandBufferAllocateInfo ai{ 563 const VkCommandBufferAllocateInfo ai{
515 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 564 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
@@ -530,6 +579,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev
530 } 579 }
531} 580}
532 581
582void CommandPool::SetObjectNameEXT(const char* name) const {
583 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name);
584}
585
533std::vector<VkImage> SwapchainKHR::GetImages() const { 586std::vector<VkImage> SwapchainKHR::GetImages() const {
534 u32 num; 587 u32 num;
535 Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); 588 Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
@@ -538,9 +591,21 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
538 return images; 591 return images;
539} 592}
540 593
594void Event::SetObjectNameEXT(const char* name) const {
595 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name);
596}
597
598void ShaderModule::SetObjectNameEXT(const char* name) const {
599 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
600}
601
602void Semaphore::SetObjectNameEXT(const char* name) const {
603 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
604}
605
541Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, 606Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
542 Span<const char*> enabled_extensions, const void* next, 607 Span<const char*> enabled_extensions, const void* next,
543 DeviceDispatch& dld) noexcept { 608 DeviceDispatch& dispatch) noexcept {
544 const VkDeviceCreateInfo ci{ 609 const VkDeviceCreateInfo ci{
545 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, 610 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
546 .pNext = next, 611 .pNext = next,
@@ -555,11 +620,11 @@ Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreate
555 }; 620 };
556 621
557 VkDevice device; 622 VkDevice device;
558 if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { 623 if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
559 return {}; 624 return {};
560 } 625 }
561 Load(device, dld); 626 Load(device, dispatch);
562 return Device(device, dld); 627 return Device(device, dispatch);
563} 628}
564 629
565Queue Device::GetQueue(u32 family_index) const noexcept { 630Queue Device::GetQueue(u32 family_index) const noexcept {
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index f64919623..f9a184e00 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -9,6 +9,7 @@
9#include <limits> 9#include <limits>
10#include <memory> 10#include <memory>
11#include <optional> 11#include <optional>
12#include <span>
12#include <type_traits> 13#include <type_traits>
13#include <utility> 14#include <utility>
14#include <vector> 15#include <vector>
@@ -18,6 +19,10 @@
18 19
19#include "common/common_types.h" 20#include "common/common_types.h"
20 21
22#ifdef _MSC_VER
23#pragma warning(disable : 26812) // Disable prefer enum class over enum
24#endif
25
21namespace Vulkan::vk { 26namespace Vulkan::vk {
22 27
23/** 28/**
@@ -41,6 +46,9 @@ public:
41 /// Construct an empty span. 46 /// Construct an empty span.
42 constexpr Span() noexcept = default; 47 constexpr Span() noexcept = default;
43 48
49 /// Construct an empty span
50 constexpr Span(std::nullptr_t) noexcept {}
51
44 /// Construct a span from a single element. 52 /// Construct a span from a single element.
45 constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} 53 constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {}
46 54
@@ -52,7 +60,7 @@ public:
52 60
53 /// Construct a span from a pointer and a size. 61 /// Construct a span from a pointer and a size.
54 /// This is inteded for subranges. 62 /// This is inteded for subranges.
55 constexpr Span(const T* ptr, std::size_t num) noexcept : ptr{ptr}, num{num} {} 63 constexpr Span(const T* ptr_, std::size_t num_) noexcept : ptr{ptr_}, num{num_} {}
56 64
57 /// Returns the data pointer by the span. 65 /// Returns the data pointer by the span.
58 constexpr const T* data() const noexcept { 66 constexpr const T* data() const noexcept {
@@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch {
177 PFN_vkCmdBeginQuery vkCmdBeginQuery; 185 PFN_vkCmdBeginQuery vkCmdBeginQuery;
178 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; 186 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass;
179 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; 187 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT;
188 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT;
180 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; 189 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;
181 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; 190 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;
182 PFN_vkCmdBindPipeline vkCmdBindPipeline; 191 PFN_vkCmdBindPipeline vkCmdBindPipeline;
@@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch {
194 PFN_vkCmdEndQuery vkCmdEndQuery; 203 PFN_vkCmdEndQuery vkCmdEndQuery;
195 PFN_vkCmdEndRenderPass vkCmdEndRenderPass; 204 PFN_vkCmdEndRenderPass vkCmdEndRenderPass;
196 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; 205 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT;
206 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT;
197 PFN_vkCmdFillBuffer vkCmdFillBuffer; 207 PFN_vkCmdFillBuffer vkCmdFillBuffer;
198 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; 208 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
199 PFN_vkCmdPushConstants vkCmdPushConstants; 209 PFN_vkCmdPushConstants vkCmdPushConstants;
@@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch {
217 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; 227 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;
218 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; 228 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;
219 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; 229 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;
230 PFN_vkCmdResolveImage vkCmdResolveImage;
220 PFN_vkCreateBuffer vkCreateBuffer; 231 PFN_vkCreateBuffer vkCreateBuffer;
221 PFN_vkCreateBufferView vkCreateBufferView; 232 PFN_vkCreateBufferView vkCreateBufferView;
222 PFN_vkCreateCommandPool vkCreateCommandPool; 233 PFN_vkCreateCommandPool vkCreateCommandPool;
@@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch {
272 PFN_vkQueueSubmit vkQueueSubmit; 283 PFN_vkQueueSubmit vkQueueSubmit;
273 PFN_vkResetFences vkResetFences; 284 PFN_vkResetFences vkResetFences;
274 PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; 285 PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT;
286 PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT;
287 PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT;
275 PFN_vkUnmapMemory vkUnmapMemory; 288 PFN_vkUnmapMemory vkUnmapMemory;
276 PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; 289 PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
277 PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; 290 PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
@@ -469,9 +482,10 @@ public:
469 PoolAllocations() = default; 482 PoolAllocations() = default;
470 483
471 /// Construct an allocation. Errors are reported through IsOutOfPoolMemory(). 484 /// Construct an allocation. Errors are reported through IsOutOfPoolMemory().
472 explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations, std::size_t num, 485 explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations_, std::size_t num_,
473 VkDevice device, PoolType pool, const DeviceDispatch& dld) noexcept 486 VkDevice device_, PoolType pool_, const DeviceDispatch& dld_) noexcept
474 : allocations{std::move(allocations)}, num{num}, device{device}, pool{pool}, dld{&dld} {} 487 : allocations{std::move(allocations_)}, num{num_}, device{device_}, pool{pool_},
488 dld{&dld_} {}
475 489
476 /// Copying Vulkan allocations is not supported and will never be. 490 /// Copying Vulkan allocations is not supported and will never be.
477 PoolAllocations(const PoolAllocations&) = delete; 491 PoolAllocations(const PoolAllocations&) = delete;
@@ -541,18 +555,14 @@ private:
541 const DeviceDispatch* dld = nullptr; 555 const DeviceDispatch* dld = nullptr;
542}; 556};
543 557
544using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>;
545using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; 558using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
546using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; 559using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
547using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; 560using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>;
548using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>;
549using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>;
550using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; 561using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
551using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; 562using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
552using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; 563using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
553using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; 564using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
554using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; 565using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
555using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
556using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; 566using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
557 567
558using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; 568using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>;
@@ -565,7 +575,7 @@ class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> {
565public: 575public:
566 /// Creates a Vulkan instance. Use "operator bool" for error handling. 576 /// Creates a Vulkan instance. Use "operator bool" for error handling.
567 static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions, 577 static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
568 InstanceDispatch& dld) noexcept; 578 InstanceDispatch& dispatch) noexcept;
569 579
570 /// Enumerates physical devices. 580 /// Enumerates physical devices.
571 /// @return Physical devices and an empty handle on failure. 581 /// @return Physical devices and an empty handle on failure.
@@ -581,7 +591,8 @@ public:
581 constexpr Queue() noexcept = default; 591 constexpr Queue() noexcept = default;
582 592
583 /// Construct a queue handle. 593 /// Construct a queue handle.
584 constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} 594 constexpr Queue(VkQueue queue_, const DeviceDispatch& dld_) noexcept
595 : queue{queue_}, dld{&dld_} {}
585 596
586 VkResult Submit(Span<VkSubmitInfo> submit_infos, 597 VkResult Submit(Span<VkSubmitInfo> submit_infos,
587 VkFence fence = VK_NULL_HANDLE) const noexcept { 598 VkFence fence = VK_NULL_HANDLE) const noexcept {
@@ -603,6 +614,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
603public: 614public:
604 /// Attaches a memory allocation. 615 /// Attaches a memory allocation.
605 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; 616 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
617
618 /// Set object name.
619 void SetObjectNameEXT(const char* name) const;
620};
621
622class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> {
623 using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle;
624
625public:
626 /// Set object name.
627 void SetObjectNameEXT(const char* name) const;
606}; 628};
607 629
608class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { 630class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
@@ -611,12 +633,26 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
611public: 633public:
612 /// Attaches a memory allocation. 634 /// Attaches a memory allocation.
613 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; 635 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
636
637 /// Set object name.
638 void SetObjectNameEXT(const char* name) const;
639};
640
641class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> {
642 using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle;
643
644public:
645 /// Set object name.
646 void SetObjectNameEXT(const char* name) const;
614}; 647};
615 648
616class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { 649class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
617 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; 650 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
618 651
619public: 652public:
653 /// Set object name.
654 void SetObjectNameEXT(const char* name) const;
655
620 u8* Map(VkDeviceSize offset, VkDeviceSize size) const { 656 u8* Map(VkDeviceSize offset, VkDeviceSize size) const {
621 void* data; 657 void* data;
622 Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); 658 Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data));
@@ -632,6 +668,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> {
632 using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; 668 using Handle<VkFence, VkDevice, DeviceDispatch>::Handle;
633 669
634public: 670public:
671 /// Set object name.
672 void SetObjectNameEXT(const char* name) const;
673
635 VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { 674 VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept {
636 return dld->vkWaitForFences(owner, 1, &handle, true, timeout); 675 return dld->vkWaitForFences(owner, 1, &handle, true, timeout);
637 } 676 }
@@ -645,11 +684,22 @@ public:
645 } 684 }
646}; 685};
647 686
687class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> {
688 using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle;
689
690public:
691 /// Set object name.
692 void SetObjectNameEXT(const char* name) const;
693};
694
648class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { 695class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> {
649 using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; 696 using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle;
650 697
651public: 698public:
652 DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; 699 DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const;
700
701 /// Set object name.
702 void SetObjectNameEXT(const char* name) const;
653}; 703};
654 704
655class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { 705class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
@@ -658,6 +708,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
658public: 708public:
659 CommandBuffers Allocate(std::size_t num_buffers, 709 CommandBuffers Allocate(std::size_t num_buffers,
660 VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; 710 VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const;
711
712 /// Set object name.
713 void SetObjectNameEXT(const char* name) const;
661}; 714};
662 715
663class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { 716class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> {
@@ -671,15 +724,29 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
671 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; 724 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
672 725
673public: 726public:
727 /// Set object name.
728 void SetObjectNameEXT(const char* name) const;
729
674 VkResult GetStatus() const noexcept { 730 VkResult GetStatus() const noexcept {
675 return dld->vkGetEventStatus(owner, handle); 731 return dld->vkGetEventStatus(owner, handle);
676 } 732 }
677}; 733};
678 734
735class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> {
736 using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle;
737
738public:
739 /// Set object name.
740 void SetObjectNameEXT(const char* name) const;
741};
742
679class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { 743class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
680 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; 744 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
681 745
682public: 746public:
747 /// Set object name.
748 void SetObjectNameEXT(const char* name) const;
749
683 [[nodiscard]] u64 GetCounter() const { 750 [[nodiscard]] u64 GetCounter() const {
684 u64 value; 751 u64 value;
685 Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); 752 Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
@@ -720,7 +787,7 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
720public: 787public:
721 static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, 788 static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
722 Span<const char*> enabled_extensions, const void* next, 789 Span<const char*> enabled_extensions, const void* next,
723 DeviceDispatch& dld) noexcept; 790 DeviceDispatch& dispatch) noexcept;
724 791
725 Queue GetQueue(u32 family_index) const noexcept; 792 Queue GetQueue(u32 family_index) const noexcept;
726 793
@@ -809,8 +876,9 @@ class PhysicalDevice {
809public: 876public:
810 constexpr PhysicalDevice() noexcept = default; 877 constexpr PhysicalDevice() noexcept = default;
811 878
812 constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept 879 constexpr PhysicalDevice(VkPhysicalDevice physical_device_,
813 : physical_device{physical_device}, dld{&dld} {} 880 const InstanceDispatch& dld_) noexcept
881 : physical_device{physical_device_}, dld{&dld_} {}
814 882
815 constexpr operator VkPhysicalDevice() const noexcept { 883 constexpr operator VkPhysicalDevice() const noexcept {
816 return physical_device; 884 return physical_device;
@@ -849,8 +917,8 @@ class CommandBuffer {
849public: 917public:
850 CommandBuffer() noexcept = default; 918 CommandBuffer() noexcept = default;
851 919
852 explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept 920 explicit CommandBuffer(VkCommandBuffer handle_, const DeviceDispatch& dld_) noexcept
853 : handle{handle}, dld{&dld} {} 921 : handle{handle_}, dld{&dld_} {}
854 922
855 const VkCommandBuffer* address() const noexcept { 923 const VkCommandBuffer* address() const noexcept {
856 return &handle; 924 return &handle;
@@ -929,6 +997,12 @@ public:
929 regions.data(), filter); 997 regions.data(), filter);
930 } 998 }
931 999
1000 void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
1001 VkImageLayout dst_layout, Span<VkImageResolve> regions) {
1002 dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
1003 regions.data());
1004 }
1005
932 void Dispatch(u32 x, u32 y, u32 z) const noexcept { 1006 void Dispatch(u32 x, u32 y, u32 z) const noexcept {
933 dld->vkCmdDispatch(handle, x, y, z); 1007 dld->vkCmdDispatch(handle, x, y, z);
934 } 1008 }
@@ -943,6 +1017,23 @@ public:
943 image_barriers.size(), image_barriers.data()); 1017 image_barriers.size(), image_barriers.data());
944 } 1018 }
945 1019
1020 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1021 VkDependencyFlags dependency_flags = 0) const noexcept {
1022 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {});
1023 }
1024
1025 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1026 VkDependencyFlags dependency_flags,
1027 const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
1028 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
1029 }
1030
1031 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1032 VkDependencyFlags dependency_flags,
1033 const VkImageMemoryBarrier& image_barrier) const noexcept {
1034 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier);
1035 }
1036
946 void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, 1037 void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout,
947 Span<VkBufferImageCopy> regions) const noexcept { 1038 Span<VkBufferImageCopy> regions) const noexcept {
948 dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), 1039 dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(),
@@ -976,6 +1067,13 @@ public:
976 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); 1067 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
977 } 1068 }
978 1069
1070 template <typename T>
1071 void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags,
1072 const T& data) const noexcept {
1073 static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable");
1074 dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data);
1075 }
1076
979 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { 1077 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
980 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); 1078 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
981 } 1079 }
@@ -1085,6 +1183,20 @@ public:
1085 counter_buffers, counter_buffer_offsets); 1183 counter_buffers, counter_buffer_offsets);
1086 } 1184 }
1087 1185
1186 void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept {
1187 const VkDebugUtilsLabelEXT label_info{
1188 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
1189 .pNext = nullptr,
1190 .pLabelName = label,
1191 .color{color[0], color[1], color[2], color[3]},
1192 };
1193 dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info);
1194 }
1195
1196 void EndDebugUtilsLabelEXT() const noexcept {
1197 dld->vkCmdEndDebugUtilsLabelEXT(handle);
1198 }
1199
1088private: 1200private:
1089 VkCommandBuffer handle; 1201 VkCommandBuffer handle;
1090 const DeviceDispatch* dld; 1202 const DeviceDispatch* dld;
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp
deleted file mode 100644
index 53c7ef12d..000000000
--- a/src/video_core/sampler_cache.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/cityhash.h"
6#include "common/common_types.h"
7#include "video_core/sampler_cache.h"
8
9namespace VideoCommon {
10
11std::size_t SamplerCacheKey::Hash() const {
12 static_assert(sizeof(raw) % sizeof(u64) == 0);
13 return static_cast<std::size_t>(
14 Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
15}
16
17bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
18 return raw == rhs.raw;
19}
20
21} // namespace VideoCommon
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h
deleted file mode 100644
index cbe3ad071..000000000
--- a/src/video_core/sampler_cache.h
+++ /dev/null
@@ -1,60 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <unordered_map>
9
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
15 std::size_t Hash() const;
16
17 bool operator==(const SamplerCacheKey& rhs) const;
18
19 bool operator!=(const SamplerCacheKey& rhs) const {
20 return !operator==(rhs);
21 }
22};
23
24} // namespace VideoCommon
25
26namespace std {
27
28template <>
29struct hash<VideoCommon::SamplerCacheKey> {
30 std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept {
31 return k.Hash();
32 }
33};
34
35} // namespace std
36
37namespace VideoCommon {
38
39template <typename SamplerType, typename SamplerStorageType>
40class SamplerCache {
41public:
42 SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) {
43 const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
44 auto& sampler = entry->second;
45 if (is_cache_miss) {
46 sampler = CreateSampler(tsc);
47 }
48 return ToSamplerType(sampler);
49 }
50
51protected:
52 virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0;
53
54 virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0;
55
56private:
57 std::unordered_map<SamplerCacheKey, SamplerStorageType> cache;
58};
59
60} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
index 3f96d9076..db11144c7 100644
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@@ -212,16 +212,15 @@ public:
212 } 212 }
213 213
214 void operator()(const ExprPredicate& expr) { 214 void operator()(const ExprPredicate& expr) {
215 inner += "P" + std::to_string(expr.predicate); 215 inner += fmt::format("P{}", expr.predicate);
216 } 216 }
217 217
218 void operator()(const ExprCondCode& expr) { 218 void operator()(const ExprCondCode& expr) {
219 u32 cc = static_cast<u32>(expr.cc); 219 inner += fmt::format("CC{}", expr.cc);
220 inner += "CC" + std::to_string(cc);
221 } 220 }
222 221
223 void operator()(const ExprVar& expr) { 222 void operator()(const ExprVar& expr) {
224 inner += "V" + std::to_string(expr.var_index); 223 inner += fmt::format("V{}", expr.var_index);
225 } 224 }
226 225
227 void operator()(const ExprBoolean& expr) { 226 void operator()(const ExprBoolean& expr) {
@@ -229,7 +228,7 @@ public:
229 } 228 }
230 229
231 void operator()(const ExprGprEqual& expr) { 230 void operator()(const ExprGprEqual& expr) {
232 inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; 231 inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
233 } 232 }
234 233
235 const std::string& GetResult() const { 234 const std::string& GetResult() const {
@@ -374,8 +373,8 @@ std::string ASTManager::Print() const {
374 return printer.GetResult(); 373 return printer.GetResult();
375} 374}
376 375
377ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation) 376ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
378 : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {}; 377 : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
379 378
380ASTManager::~ASTManager() { 379ASTManager::~ASTManager() {
381 Clear(); 380 Clear();
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index 8e5a22ab3..dc49b369e 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -76,7 +76,7 @@ public:
76 76
77class ASTIfThen { 77class ASTIfThen {
78public: 78public:
79 explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {} 79 explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
80 Expr condition; 80 Expr condition;
81 ASTZipper nodes{}; 81 ASTZipper nodes{};
82}; 82};
@@ -88,63 +88,68 @@ public:
88 88
89class ASTBlockEncoded { 89class ASTBlockEncoded {
90public: 90public:
91 explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {} 91 explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
92 u32 start; 92 u32 start;
93 u32 end; 93 u32 end;
94}; 94};
95 95
96class ASTBlockDecoded { 96class ASTBlockDecoded {
97public: 97public:
98 explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {} 98 explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
99 NodeBlock nodes; 99 NodeBlock nodes;
100}; 100};
101 101
102class ASTVarSet { 102class ASTVarSet {
103public: 103public:
104 explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {} 104 explicit ASTVarSet(u32 index_, Expr condition_)
105 : index{index_}, condition{std::move(condition_)} {}
106
105 u32 index; 107 u32 index;
106 Expr condition; 108 Expr condition;
107}; 109};
108 110
109class ASTLabel { 111class ASTLabel {
110public: 112public:
111 explicit ASTLabel(u32 index) : index{index} {} 113 explicit ASTLabel(u32 index_) : index{index_} {}
112 u32 index; 114 u32 index;
113 bool unused{}; 115 bool unused{};
114}; 116};
115 117
116class ASTGoto { 118class ASTGoto {
117public: 119public:
118 explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {} 120 explicit ASTGoto(Expr condition_, u32 label_)
121 : condition{std::move(condition_)}, label{label_} {}
122
119 Expr condition; 123 Expr condition;
120 u32 label; 124 u32 label;
121}; 125};
122 126
123class ASTDoWhile { 127class ASTDoWhile {
124public: 128public:
125 explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {} 129 explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
126 Expr condition; 130 Expr condition;
127 ASTZipper nodes{}; 131 ASTZipper nodes{};
128}; 132};
129 133
130class ASTReturn { 134class ASTReturn {
131public: 135public:
132 explicit ASTReturn(Expr condition, bool kills) 136 explicit ASTReturn(Expr condition_, bool kills_)
133 : condition{std::move(condition)}, kills{kills} {} 137 : condition{std::move(condition_)}, kills{kills_} {}
138
134 Expr condition; 139 Expr condition;
135 bool kills; 140 bool kills;
136}; 141};
137 142
138class ASTBreak { 143class ASTBreak {
139public: 144public:
140 explicit ASTBreak(Expr condition) : condition{std::move(condition)} {} 145 explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
141 Expr condition; 146 Expr condition;
142}; 147};
143 148
144class ASTBase { 149class ASTBase {
145public: 150public:
146 explicit ASTBase(ASTNode parent, ASTData data) 151 explicit ASTBase(ASTNode parent_, ASTData data_)
147 : data{std::move(data)}, parent{std::move(parent)} {} 152 : data{std::move(data_)}, parent{std::move(parent_)} {}
148 153
149 template <class U, class... Args> 154 template <class U, class... Args>
150 static ASTNode Make(ASTNode parent, Args&&... args) { 155 static ASTNode Make(ASTNode parent, Args&&... args) {
@@ -300,7 +305,7 @@ private:
300 305
301class ASTManager final { 306class ASTManager final {
302public: 307public:
303 ASTManager(bool full_decompile, bool disable_else_derivation); 308 explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
304 ~ASTManager(); 309 ~ASTManager();
305 310
306 ASTManager(const ASTManager& o) = delete; 311 ASTManager(const ASTManager& o) = delete;
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 6920afdf2..09f93463b 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -13,7 +13,7 @@
13 13
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} 16AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
17 17
18AsyncShaders::~AsyncShaders() { 18AsyncShaders::~AsyncShaders() {
19 KillWorkers(); 19 KillWorkers();
@@ -137,10 +137,9 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
137 const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, 137 const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
138 Vulkan::VKDescriptorPool& descriptor_pool, 138 Vulkan::VKDescriptorPool& descriptor_pool,
139 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, 139 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
140 Vulkan::VKRenderPassCache& renderpass_cache,
141 std::vector<VkDescriptorSetLayoutBinding> bindings, 140 std::vector<VkDescriptorSetLayoutBinding> bindings,
142 Vulkan::SPIRVProgram program, 141 Vulkan::SPIRVProgram program,
143 Vulkan::GraphicsPipelineCacheKey key) { 142 Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
144 std::unique_lock lock(queue_mutex); 143 std::unique_lock lock(queue_mutex);
145 pending_queue.push({ 144 pending_queue.push({
146 .backend = Backend::Vulkan, 145 .backend = Backend::Vulkan,
@@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
149 .scheduler = &scheduler, 148 .scheduler = &scheduler,
150 .descriptor_pool = &descriptor_pool, 149 .descriptor_pool = &descriptor_pool,
151 .update_descriptor_queue = &update_descriptor_queue, 150 .update_descriptor_queue = &update_descriptor_queue,
152 .renderpass_cache = &renderpass_cache,
153 .bindings = std::move(bindings), 151 .bindings = std::move(bindings),
154 .program = std::move(program), 152 .program = std::move(program),
155 .key = key, 153 .key = key,
154 .num_color_buffers = num_color_buffers,
156 }); 155 });
157 cv.notify_one(); 156 cv.notify_one();
158} 157}
@@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
205 } else if (work.backend == Backend::Vulkan) { 204 } else if (work.backend == Backend::Vulkan) {
206 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( 205 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
207 *work.vk_device, *work.scheduler, *work.descriptor_pool, 206 *work.vk_device, *work.scheduler, *work.descriptor_pool,
208 *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, 207 *work.update_descriptor_queue, work.key, work.bindings, work.program,
209 work.program); 208 work.num_color_buffers);
210 209
211 work.pp_cache->EmplacePipeline(std::move(pipeline)); 210 work.pp_cache->EmplacePipeline(std::move(pipeline));
212 } 211 }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 7a99e1dc5..004e214a8 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -66,7 +66,7 @@ public:
66 Tegra::Engines::ShaderType shader_type; 66 Tegra::Engines::ShaderType shader_type;
67 }; 67 };
68 68
69 explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); 69 explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
70 ~AsyncShaders(); 70 ~AsyncShaders();
71 71
72 /// Start up shader worker threads 72 /// Start up shader worker threads
@@ -98,9 +98,9 @@ public:
98 Vulkan::VKScheduler& scheduler, 98 Vulkan::VKScheduler& scheduler,
99 Vulkan::VKDescriptorPool& descriptor_pool, 99 Vulkan::VKDescriptorPool& descriptor_pool,
100 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, 100 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
101 Vulkan::VKRenderPassCache& renderpass_cache,
102 std::vector<VkDescriptorSetLayoutBinding> bindings, 101 std::vector<VkDescriptorSetLayoutBinding> bindings,
103 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); 102 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
103 u32 num_color_buffers);
104 104
105private: 105private:
106 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); 106 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
@@ -127,10 +127,10 @@ private:
127 Vulkan::VKScheduler* scheduler; 127 Vulkan::VKScheduler* scheduler;
128 Vulkan::VKDescriptorPool* descriptor_pool; 128 Vulkan::VKDescriptorPool* descriptor_pool;
129 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; 129 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
130 Vulkan::VKRenderPassCache* renderpass_cache;
131 std::vector<VkDescriptorSetLayoutBinding> bindings; 130 std::vector<VkDescriptorSetLayoutBinding> bindings;
132 Vulkan::SPIRVProgram program; 131 Vulkan::SPIRVProgram program;
133 Vulkan::GraphicsPipelineCacheKey key; 132 Vulkan::GraphicsPipelineCacheKey key;
133 u32 num_color_buffers;
134 }; 134 };
135 135
136 std::condition_variable cv; 136 std::condition_variable cv;
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 4c8971615..43d965f2f 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -66,8 +66,8 @@ struct BlockInfo {
66}; 66};
67 67
68struct CFGRebuildState { 68struct CFGRebuildState {
69 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) 69 explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
70 : program_code{program_code}, registry{registry}, start{start} {} 70 : program_code{program_code_}, registry{registry_}, start{start_} {}
71 71
72 const ProgramCode& program_code; 72 const ProgramCode& program_code;
73 Registry& registry; 73 Registry& registry;
@@ -241,10 +241,10 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
241 ParseInfo parse_info{}; 241 ParseInfo parse_info{};
242 SingleBranch single_branch{}; 242 SingleBranch single_branch{};
243 243
244 const auto insert_label = [](CFGRebuildState& state, u32 address) { 244 const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
245 const auto pair = state.labels.emplace(address); 245 const auto pair = rebuild_state.labels.emplace(label_address);
246 if (pair.second) { 246 if (pair.second) {
247 state.inspect_queries.push_back(address); 247 rebuild_state.inspect_queries.push_back(label_address);
248 } 248 }
249 }; 249 };
250 250
@@ -257,7 +257,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
257 single_branch.ignore = false; 257 single_branch.ignore = false;
258 break; 258 break;
259 } 259 }
260 if (state.registered.count(offset) != 0) { 260 if (state.registered.contains(offset)) {
261 single_branch.address = offset; 261 single_branch.address = offset;
262 single_branch.ignore = true; 262 single_branch.ignore = true;
263 break; 263 break;
@@ -632,12 +632,12 @@ void DecompileShader(CFGRebuildState& state) {
632 for (auto label : state.labels) { 632 for (auto label : state.labels) {
633 state.manager->DeclareLabel(label); 633 state.manager->DeclareLabel(label);
634 } 634 }
635 for (auto& block : state.block_info) { 635 for (const auto& block : state.block_info) {
636 if (state.labels.count(block.start) != 0) { 636 if (state.labels.contains(block.start)) {
637 state.manager->InsertLabel(block.start); 637 state.manager->InsertLabel(block.start);
638 } 638 }
639 const bool ignore = BlockBranchIsIgnored(block.branch); 639 const bool ignore = BlockBranchIsIgnored(block.branch);
640 u32 end = ignore ? block.end + 1 : block.end; 640 const u32 end = ignore ? block.end + 1 : block.end;
641 state.manager->InsertBlock(block.start, end); 641 state.manager->InsertBlock(block.start, end);
642 if (!ignore) { 642 if (!ignore) {
643 InsertBranch(*state.manager, block.branch); 643 InsertBranch(*state.manager, block.branch);
@@ -737,7 +737,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
737 auto back = result_out->blocks.begin(); 737 auto back = result_out->blocks.begin();
738 auto next = std::next(back); 738 auto next = std::next(back);
739 while (next != result_out->blocks.end()) { 739 while (next != result_out->blocks.end()) {
740 if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { 740 if (!state.labels.contains(next->start) && next->start == back->end + 1) {
741 back->end = next->end; 741 back->end = next->end;
742 next = result_out->blocks.erase(next); 742 next = result_out->blocks.erase(next);
743 continue; 743 continue;
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 62a3510d8..37bf96492 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -42,10 +42,10 @@ struct Condition {
42class SingleBranch { 42class SingleBranch {
43public: 43public:
44 SingleBranch() = default; 44 SingleBranch() = default;
45 SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, 45 explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
46 bool ignore) 46 bool is_brk_, bool ignore_)
47 : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, 47 : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
48 ignore{ignore} {} 48 ignore{ignore_} {}
49 49
50 bool operator==(const SingleBranch& b) const { 50 bool operator==(const SingleBranch& b) const {
51 return std::tie(condition, address, kill, is_sync, is_brk, ignore) == 51 return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
@@ -65,15 +65,15 @@ public:
65}; 65};
66 66
67struct CaseBranch { 67struct CaseBranch {
68 CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} 68 explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
69 u32 cmp_value; 69 u32 cmp_value;
70 u32 address; 70 u32 address;
71}; 71};
72 72
73class MultiBranch { 73class MultiBranch {
74public: 74public:
75 MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches) 75 explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
76 : gpr{gpr}, branches{std::move(branches)} {} 76 : gpr{gpr_}, branches{std::move(branches_)} {}
77 77
78 u32 gpr{}; 78 u32 gpr{};
79 std::vector<CaseBranch> branches{}; 79 std::vector<CaseBranch> branches{};
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index eeac328a6..6576d1208 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -25,7 +25,7 @@ using Tegra::Shader::OpCode;
25namespace { 25namespace {
26 26
27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, 27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
28 const std::list<Sampler>& used_samplers) { 28 const std::list<SamplerEntry>& used_samplers) {
29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { 29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
30 return; 30 return;
31 } 31 }
@@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
43 } 43 }
44} 44}
45 45
46std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, 46std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
47 VideoCore::GuestDriverProfile& gpu_driver, 47 VideoCore::GuestDriverProfile& gpu_driver,
48 const std::list<Sampler>& used_samplers) { 48 const std::list<SamplerEntry>& used_samplers) {
49 const u32 base_offset = sampler_to_deduce.offset; 49 const u32 base_offset = sampler_to_deduce.offset;
50 u32 max_offset{std::numeric_limits<u32>::max()}; 50 u32 max_offset{std::numeric_limits<u32>::max()};
51 for (const auto& sampler : used_samplers) { 51 for (const auto& sampler : used_samplers) {
@@ -66,7 +66,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
66 66
67class ASTDecoder { 67class ASTDecoder {
68public: 68public:
69 ASTDecoder(ShaderIR& ir) : ir(ir) {} 69 explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
70 70
71 void operator()(ASTProgram& ast) { 71 void operator()(ASTProgram& ast) {
72 ASTNode current = ast.nodes.GetFirst(); 72 ASTNode current = ast.nodes.GetFirst();
@@ -153,8 +153,8 @@ void ShaderIR::Decode() {
153 const auto& blocks = shader_info.blocks; 153 const auto& blocks = shader_info.blocks;
154 NodeBlock current_block; 154 NodeBlock current_block;
155 u32 current_label = static_cast<u32>(exit_branch); 155 u32 current_label = static_cast<u32>(exit_branch);
156 for (auto& block : blocks) { 156 for (const auto& block : blocks) {
157 if (shader_info.labels.count(block.start) != 0) { 157 if (shader_info.labels.contains(block.start)) {
158 insert_block(current_block, current_label); 158 insert_block(current_block, current_label);
159 current_block.clear(); 159 current_block.clear();
160 current_label = block.start; 160 current_label = block.start;
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index afef5948d..15eb700e7 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -110,8 +110,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
110 case SubOp::Sqrt: 110 case SubOp::Sqrt:
111 return Operation(OperationCode::FSqrt, PRECISE, op_a); 111 return Operation(OperationCode::FSqrt, PRECISE, op_a);
112 default: 112 default:
113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", 113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
114 static_cast<unsigned>(instr.sub_op.Value()));
115 return Immediate(0); 114 return Immediate(0);
116 } 115 }
117 }(); 116 }();
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 73155966f..7b5bb7003 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -83,7 +83,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
83 case IAdd3Height::UpperHalfWord: 83 case IAdd3Height::UpperHalfWord:
84 return BitfieldExtract(value, 16, 16); 84 return BitfieldExtract(value, 16, 16);
85 default: 85 default:
86 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height)); 86 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
87 return Immediate(0); 87 return Immediate(0);
88 } 88 }
89 }; 89 };
@@ -258,7 +258,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
258 case OpCode::Id::LEA_IMM: 258 case OpCode::Id::LEA_IMM:
259 case OpCode::Id::LEA_RZ: 259 case OpCode::Id::LEA_RZ:
260 case OpCode::Id::LEA_HI: { 260 case OpCode::Id::LEA_HI: {
261 auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { 261 auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
262 switch (opcode->get().GetId()) { 262 switch (opcode->get().GetId()) {
263 case OpCode::Id::LEA_R2: { 263 case OpCode::Id::LEA_R2: {
264 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), 264 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
@@ -294,8 +294,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
294 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), 294 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
295 "Unhandled LEA Predicate"); 295 "Unhandled LEA Predicate");
296 296
297 Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c)); 297 Node value =
298 value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value)); 298 Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
299 value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
299 SetRegister(bb, instr.gpr0, std::move(value)); 300 SetRegister(bb, instr.gpr0, std::move(value));
300 301
301 break; 302 break;
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 2a30aab2b..73580277a 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -72,7 +72,7 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
72 case LogicOperation::PassB: 72 case LogicOperation::PassB:
73 return op_b; 73 return op_b;
74 default: 74 default:
75 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); 75 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
76 return Immediate(0); 76 return Immediate(0);
77 } 77 }
78 }(); 78 }();
@@ -92,8 +92,7 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
92 break; 92 break;
93 } 93 }
94 default: 94 default:
95 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", 95 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
96 static_cast<u32>(predicate_mode));
97 } 96 }
98} 97}
99 98
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index b9989c88c..fea7a54df 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -244,7 +244,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
244 return Operation(OperationCode::FTrunc, value); 244 return Operation(OperationCode::FTrunc, value);
245 default: 245 default:
246 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", 246 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
247 static_cast<u32>(instr.conversion.f2f.rounding.Value())); 247 instr.conversion.f2f.rounding.Value());
248 return value; 248 return value;
249 } 249 }
250 }(); 250 }();
@@ -300,7 +300,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
300 return Operation(OperationCode::FTrunc, PRECISE, value); 300 return Operation(OperationCode::FTrunc, PRECISE, value);
301 default: 301 default:
302 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", 302 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
303 static_cast<u32>(instr.conversion.f2i.rounding.Value())); 303 instr.conversion.f2i.rounding.Value());
304 return Immediate(0); 304 return Immediate(0);
305 } 305 }
306 }(); 306 }();
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index b2e88fa20..fa83108cd 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -22,13 +22,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr); 23 const auto opcode = OpCode::Decode(instr);
24 24
25 PredCondition cond; 25 PredCondition cond{};
26 bool bf; 26 bool bf = false;
27 bool ftz; 27 bool ftz = false;
28 bool neg_a; 28 bool neg_a = false;
29 bool abs_a; 29 bool abs_a = false;
30 bool neg_b; 30 bool neg_b = false;
31 bool abs_b; 31 bool abs_b = false;
32 switch (opcode->get().GetId()) { 32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C: 33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM: 34 case OpCode::Id::HSET2_IMM:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 1ed4212ee..5470e8cf4 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -358,9 +358,9 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
358 instr.suldst.GetStoreDataLayout() != StoreType::Bits64); 358 instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
359 359
360 auto descriptor = [this, instr] { 360 auto descriptor = [this, instr] {
361 std::optional<Tegra::Engines::SamplerDescriptor> descriptor; 361 std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
362 if (instr.suldst.is_immediate) { 362 if (instr.suldst.is_immediate) {
363 descriptor = 363 sampler_descriptor =
364 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); 364 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
365 } else { 365 } else {
366 const Node image_register = GetRegister(instr.gpr39); 366 const Node image_register = GetRegister(instr.gpr39);
@@ -368,12 +368,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
368 static_cast<s64>(global_code.size())); 368 static_cast<s64>(global_code.size()));
369 const auto buffer = std::get<1>(result); 369 const auto buffer = std::get<1>(result);
370 const auto offset = std::get<2>(result); 370 const auto offset = std::get<2>(result);
371 descriptor = registry.ObtainBindlessSampler(buffer, offset); 371 sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
372 } 372 }
373 if (!descriptor) { 373 if (!sampler_descriptor) {
374 UNREACHABLE_MSG("Failed to obtain image descriptor"); 374 UNREACHABLE_MSG("Failed to obtain image descriptor");
375 } 375 }
376 return *descriptor; 376 return *sampler_descriptor;
377 }(); 377 }();
378 378
379 const auto comp_mask = GetImageComponentMask(descriptor.format); 379 const auto comp_mask = GetImageComponentMask(descriptor.format);
@@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
497 return pc; 497 return pc;
498} 498}
499 499
500Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { 500ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
501 const auto offset = static_cast<u32>(image.index.Value()); 501 const auto offset = static_cast<u32>(image.index.Value());
502 502
503 const auto it = std::find_if(std::begin(used_images), std::end(used_images), 503 const auto it =
504 [offset](const Image& entry) { return entry.offset == offset; }); 504 std::find_if(std::begin(used_images), std::end(used_images),
505 [offset](const ImageEntry& entry) { return entry.offset == offset; });
505 if (it != std::end(used_images)) { 506 if (it != std::end(used_images)) {
506 ASSERT(!it->is_bindless && it->type == type); 507 ASSERT(!it->is_bindless && it->type == type);
507 return *it; 508 return *it;
@@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t
511 return used_images.emplace_back(next_index, offset, type); 512 return used_images.emplace_back(next_index, offset, type);
512} 513}
513 514
514Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { 515ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
515 const Node image_register = GetRegister(reg); 516 const Node image_register = GetRegister(reg);
516 const auto result = 517 const auto result =
517 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); 518 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
@@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im
520 const auto offset = std::get<2>(result); 521 const auto offset = std::get<2>(result);
521 522
522 const auto it = std::find_if(std::begin(used_images), std::end(used_images), 523 const auto it = std::find_if(std::begin(used_images), std::end(used_images),
523 [buffer, offset](const Image& entry) { 524 [buffer, offset](const ImageEntry& entry) {
524 return entry.buffer == buffer && entry.offset == offset; 525 return entry.buffer == buffer && entry.offset == offset;
525 }); 526 });
526 if (it != std::end(used_images)) { 527 if (it != std::end(used_images)) {
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index e2bba88dd..50f4e7d35 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -47,7 +47,7 @@ OperationCode GetAtomOperation(AtomicOp op) {
47 case AtomicOp::Exch: 47 case AtomicOp::Exch:
48 return OperationCode::AtomicIExchange; 48 return OperationCode::AtomicIExchange;
49 default: 49 default:
50 UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); 50 UNIMPLEMENTED_MSG("op={}", op);
51 return OperationCode::AtomicIAdd; 51 return OperationCode::AtomicIAdd;
52 } 52 }
53} 53}
@@ -83,7 +83,7 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
83 case Tegra::Shader::UniformType::UnsignedQuad: 83 case Tegra::Shader::UniformType::UnsignedQuad:
84 return 128; 84 return 128;
85 default: 85 default:
86 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 86 UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
87 return 32; 87 return 32;
88 } 88 }
89} 89}
@@ -175,12 +175,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
175 break; 175 break;
176 } 176 }
177 default: 177 default:
178 UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); 178 UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
179 } 179 }
180 break; 180 break;
181 } 181 }
182 case OpCode::Id::LD_L: 182 case OpCode::Id::LD_L:
183 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); 183 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
184 [[fallthrough]]; 184 [[fallthrough]];
185 case OpCode::Id::LD_S: { 185 case OpCode::Id::LD_S: {
186 const auto GetAddress = [&](s32 offset) { 186 const auto GetAddress = [&](s32 offset) {
@@ -224,7 +224,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
224 } 224 }
225 default: 225 default:
226 UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), 226 UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
227 static_cast<u32>(instr.ldst_sl.type.Value())); 227 instr.ldst_sl.type.Value());
228 } 228 }
229 break; 229 break;
230 } 230 }
@@ -306,8 +306,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
306 break; 306 break;
307 } 307 }
308 case OpCode::Id::ST_L: 308 case OpCode::Id::ST_L:
309 LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", 309 LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
310 static_cast<u64>(instr.st_l.cache_management.Value()));
311 [[fallthrough]]; 310 [[fallthrough]];
312 case OpCode::Id::ST_S: { 311 case OpCode::Id::ST_S: {
313 const auto GetAddress = [&](s32 offset) { 312 const auto GetAddress = [&](s32 offset) {
@@ -340,7 +339,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
340 } 339 }
341 default: 340 default:
342 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), 341 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
343 static_cast<u32>(instr.ldst_sl.type.Value())); 342 instr.ldst_sl.type.Value());
344 } 343 }
345 break; 344 break;
346 } 345 }
@@ -387,7 +386,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
387 } 386 }
388 case OpCode::Id::RED: { 387 case OpCode::Id::RED: {
389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", 388 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
390 static_cast<int>(instr.red.type.Value())); 389 instr.red.type.Value());
391 const auto [real_address, base_address, descriptor] = 390 const auto [real_address, base_address, descriptor] =
392 TrackGlobalMemory(bb, instr, true, true); 391 TrackGlobalMemory(bb, instr, true, true);
393 if (!real_address || !base_address) { 392 if (!real_address || !base_address) {
@@ -403,12 +402,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
403 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || 402 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
404 instr.atom.operation == AtomicOp::Dec || 403 instr.atom.operation == AtomicOp::Dec ||
405 instr.atom.operation == AtomicOp::SafeAdd, 404 instr.atom.operation == AtomicOp::SafeAdd,
406 "operation={}", static_cast<int>(instr.atom.operation.Value())); 405 "operation={}", instr.atom.operation.Value());
407 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || 406 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
408 instr.atom.type == GlobalAtomicType::U64 || 407 instr.atom.type == GlobalAtomicType::U64 ||
409 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || 408 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
410 instr.atom.type == GlobalAtomicType::F32_FTZ_RN, 409 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
411 "type={}", static_cast<int>(instr.atom.type.Value())); 410 "type={}", instr.atom.type.Value());
412 411
413 const auto [real_address, base_address, descriptor] = 412 const auto [real_address, base_address, descriptor] =
414 TrackGlobalMemory(bb, instr, true, true); 413 TrackGlobalMemory(bb, instr, true, true);
@@ -428,10 +427,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
428 case OpCode::Id::ATOMS: { 427 case OpCode::Id::ATOMS: {
429 UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || 428 UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
430 instr.atoms.operation == AtomicOp::Dec, 429 instr.atoms.operation == AtomicOp::Dec,
431 "operation={}", static_cast<int>(instr.atoms.operation.Value())); 430 "operation={}", instr.atoms.operation.Value());
432 UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || 431 UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
433 instr.atoms.type == AtomicType::U64, 432 instr.atoms.type == AtomicType::U64,
434 "type={}", static_cast<int>(instr.atoms.type.Value())); 433 "type={}", instr.atoms.type.Value());
435 const bool is_signed = 434 const bool is_signed =
436 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; 435 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
437 const s32 offset = instr.atoms.GetImmediateOffset(); 436 const s32 offset = instr.atoms.GetImmediateOffset();
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 29a7cfbfe..d3ea07aac 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -34,14 +34,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
34 break; 34 break;
35 } 35 }
36 case OpCode::Id::EXIT: { 36 case OpCode::Id::EXIT: {
37 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 37 const ConditionCode cc = instr.flow_condition_code;
38 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", 38 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
39 static_cast<u32>(cc));
40 39
41 switch (instr.flow.cond) { 40 switch (instr.flow.cond) {
42 case Tegra::Shader::FlowCondition::Always: 41 case Tegra::Shader::FlowCondition::Always:
43 bb.push_back(Operation(OperationCode::Exit)); 42 bb.push_back(Operation(OperationCode::Exit));
44 if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) { 43 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
45 // If this is an unconditional exit then just end processing here, 44 // If this is an unconditional exit then just end processing here,
46 // otherwise we have to account for the possibility of the condition 45 // otherwise we have to account for the possibility of the condition
47 // not being met, so continue processing the next instruction. 46 // not being met, so continue processing the next instruction.
@@ -56,17 +55,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
56 break; 55 break;
57 56
58 default: 57 default:
59 UNIMPLEMENTED_MSG("Unhandled flow condition: {}", 58 UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
60 static_cast<u32>(instr.flow.cond.Value()));
61 } 59 }
62 break; 60 break;
63 } 61 }
64 case OpCode::Id::KIL: { 62 case OpCode::Id::KIL: {
65 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); 63 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
66 64
67 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 65 const ConditionCode cc = instr.flow_condition_code;
68 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", 66 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
69 static_cast<u32>(cc));
70 67
71 bb.push_back(Operation(OperationCode::Discard)); 68 bb.push_back(Operation(OperationCode::Discard));
72 break; 69 break;
@@ -90,11 +87,11 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
90 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); 87 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
91 return Immediate(0U); 88 return Immediate(0U);
92 case SystemVariable::Tid: { 89 case SystemVariable::Tid: {
93 Node value = Immediate(0); 90 Node val = Immediate(0);
94 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); 91 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
95 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9); 92 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
96 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5); 93 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
97 return value; 94 return val;
98 } 95 }
99 case SystemVariable::TidX: 96 case SystemVariable::TidX:
100 return Operation(OperationCode::LocalInvocationIdX); 97 return Operation(OperationCode::LocalInvocationIdX);
@@ -130,8 +127,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
130 return Immediate(0u); 127 return Immediate(0u);
131 } 128 }
132 default: 129 default:
133 UNIMPLEMENTED_MSG("Unhandled system move: {}", 130 UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
134 static_cast<u32>(instr.sys20.Value()));
135 return Immediate(0u); 131 return Immediate(0u);
136 } 132 }
137 }(); 133 }();
@@ -181,8 +177,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
181 } 177 }
182 const Node branch = Operation(OperationCode::BranchIndirect, operand); 178 const Node branch = Operation(OperationCode::BranchIndirect, operand);
183 179
184 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 180 const ConditionCode cc = instr.flow_condition_code;
185 if (cc != Tegra::Shader::ConditionCode::T) { 181 if (cc != ConditionCode::T) {
186 bb.push_back(Conditional(GetConditionCode(cc), {branch})); 182 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
187 } else { 183 } else {
188 bb.push_back(branch); 184 bb.push_back(branch);
@@ -218,9 +214,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
218 break; 214 break;
219 } 215 }
220 case OpCode::Id::SYNC: { 216 case OpCode::Id::SYNC: {
221 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 217 const ConditionCode cc = instr.flow_condition_code;
222 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", 218 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
223 static_cast<u32>(cc));
224 219
225 if (decompiled) { 220 if (decompiled) {
226 break; 221 break;
@@ -231,9 +226,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
231 break; 226 break;
232 } 227 }
233 case OpCode::Id::BRK: { 228 case OpCode::Id::BRK: {
234 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 229 const ConditionCode cc = instr.flow_condition_code;
235 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", 230 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
236 static_cast<u32>(cc));
237 if (decompiled) { 231 if (decompiled) {
238 break; 232 break;
239 } 233 }
@@ -306,7 +300,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
306 case Tegra::Shader::MembarType::GL: 300 case Tegra::Shader::MembarType::GL:
307 return OperationCode::MemoryBarrierGlobal; 301 return OperationCode::MemoryBarrierGlobal;
308 default: 302 default:
309 UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); 303 UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
310 return OperationCode::MemoryBarrierGlobal; 304 return OperationCode::MemoryBarrierGlobal;
311 } 305 }
312 }(); 306 }();
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index d4ffa8014..a53819c15 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -125,7 +125,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
125 case OpCode::Id::SHF_LEFT_IMM: { 125 case OpCode::Id::SHF_LEFT_IMM: {
126 UNIMPLEMENTED_IF(instr.generates_cc); 126 UNIMPLEMENTED_IF(instr.generates_cc);
127 UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", 127 UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
128 static_cast<int>(instr.shf.xmode.Value())); 128 instr.shf.xmode.Value());
129 129
130 if (instr.is_b_imm) { 130 if (instr.is_b_imm) {
131 op_b = Immediate(static_cast<u32>(instr.shf.immediate)); 131 op_b = Immediate(static_cast<u32>(instr.shf.immediate));
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 02fdccd86..833fa2a39 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -34,7 +34,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
34 case TextureType::TextureCube: 34 case TextureType::TextureCube:
35 return 3; 35 return 3;
36 default: 36 default:
37 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); 37 UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
38 return 0; 38 return 0;
39 } 39 }
40} 40}
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
141 141
142 SamplerInfo info; 142 SamplerInfo info;
143 info.is_shadow = is_depth_compare; 143 info.is_shadow = is_depth_compare;
144 const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); 144 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
145 145
146 Node4 values; 146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) { 147 for (u32 element = 0; element < values.size(); ++element) {
@@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
173 SamplerInfo info; 173 SamplerInfo info;
174 info.type = texture_type; 174 info.type = texture_type;
175 info.is_array = is_array; 175 info.is_array = is_array;
176 const std::optional<Sampler> sampler = is_bindless 176 const std::optional<SamplerEntry> sampler =
177 ? GetBindlessSampler(base_reg, info, index_var) 177 is_bindless ? GetBindlessSampler(base_reg, info, index_var)
178 : GetSampler(instr.sampler, info); 178 : GetSampler(instr.sampler, info);
179 Node4 values; 179 Node4 values;
180 if (!sampler) { 180 if (!sampler) {
181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); 181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
@@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
217 [[fallthrough]]; 217 [[fallthrough]];
218 case OpCode::Id::TXQ: { 218 case OpCode::Id::TXQ: {
219 Node index_var; 219 Node index_var;
220 const std::optional<Sampler> sampler = is_bindless 220 const std::optional<SamplerEntry> sampler =
221 ? GetBindlessSampler(instr.gpr8, {}, index_var) 221 is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
222 : GetSampler(instr.sampler, {}); 222 : GetSampler(instr.sampler, {});
223 223
224 if (!sampler) { 224 if (!sampler) {
225 u32 indexer = 0; 225 u32 indexer = 0;
@@ -255,8 +255,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
255 break; 255 break;
256 } 256 }
257 default: 257 default:
258 UNIMPLEMENTED_MSG("Unhandled texture query type: {}", 258 UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
259 static_cast<u32>(instr.txq.query_type.Value()));
260 } 259 }
261 break; 260 break;
262 } 261 }
@@ -273,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
273 info.type = texture_type; 272 info.type = texture_type;
274 info.is_array = is_array; 273 info.is_array = is_array;
275 Node index_var; 274 Node index_var;
276 const std::optional<Sampler> sampler = 275 const std::optional<SamplerEntry> sampler =
277 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) 276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
278 : GetSampler(instr.sampler, info); 277 : GetSampler(instr.sampler, info);
279 278
@@ -302,7 +301,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
302 case TextureType::TextureCube: 301 case TextureType::TextureCube:
303 return 3; 302 return 3;
304 default: 303 default:
305 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type)); 304 UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
306 return 2; 305 return 2;
307 } 306 }
308 }(); 307 }();
@@ -380,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
380 return info; 379 return info;
381} 380}
382 381
383std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, 382std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
384 SamplerInfo sampler_info) { 383 SamplerInfo sampler_info) {
385 const u32 offset = static_cast<u32>(sampler.index.Value()); 384 const u32 offset = static_cast<u32>(sampler.index.Value());
386 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); 385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
387 386
388 // If this sampler has already been used, return the existing mapping. 387 // If this sampler has already been used, return the existing mapping.
389 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 388 const auto it =
390 [offset](const Sampler& entry) { return entry.offset == offset; }); 389 std::find_if(used_samplers.begin(), used_samplers.end(),
390 [offset](const SamplerEntry& entry) { return entry.offset == offset; });
391 if (it != used_samplers.end()) { 391 if (it != used_samplers.end()) {
392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && 392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); 393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -400,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
400 *info.is_shadow, *info.is_buffer, false); 400 *info.is_shadow, *info.is_buffer, false);
401} 401}
402 402
403std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, 403std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 Node& index_var) { 404 SamplerInfo info, Node& index_var) {
405 const Node sampler_register = GetRegister(reg); 405 const Node sampler_register = GetRegister(reg);
406 const auto [base_node, tracked_sampler_info] = 406 const auto [base_node, tracked_sampler_info] =
407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); 407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
@@ -417,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
417 417
418 // If this sampler has already been used, return the existing mapping. 418 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer, offset](const Sampler& entry) { 420 [buffer, offset](const SamplerEntry& entry) {
421 return entry.buffer == buffer && entry.offset == offset; 421 return entry.buffer == buffer && entry.offset == offset;
422 }); 422 });
423 if (it != used_samplers.end()) { 423 if (it != used_samplers.end()) {
@@ -437,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); 437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
438 438
439 // Try to use an already created sampler if it exists 439 // Try to use an already created sampler if it exists
440 const auto it = std::find_if( 440 const auto it =
441 used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { 441 std::find_if(used_samplers.begin(), used_samplers.end(),
442 return offsets == std::pair{entry.offset, entry.secondary_offset} && 442 [indices, offsets](const SamplerEntry& entry) {
443 indices == std::pair{entry.buffer, entry.secondary_buffer}; 443 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
444 }); 444 indices == std::pair{entry.buffer, entry.secondary_buffer};
445 });
445 if (it != used_samplers.end()) { 446 if (it != used_samplers.end()) {
446 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && 447 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
447 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); 448 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -461,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
461 // If this sampler has already been used, return the existing mapping. 462 // If this sampler has already been used, return the existing mapping.
462 const auto it = std::find_if( 463 const auto it = std::find_if(
463 used_samplers.begin(), used_samplers.end(), 464 used_samplers.begin(), used_samplers.end(),
464 [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); 465 [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
465 if (it != used_samplers.end()) { 466 if (it != used_samplers.end()) {
466 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && 467 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
467 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && 468 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
@@ -566,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
566 info.is_buffer = false; 567 info.is_buffer = false;
567 568
568 Node index_var; 569 Node index_var;
569 const std::optional<Sampler> sampler = is_bindless 570 const std::optional<SamplerEntry> sampler =
570 ? GetBindlessSampler(*bindless_reg, info, index_var) 571 is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
571 : GetSampler(instr.sampler, info); 572 : GetSampler(instr.sampler, info);
572 if (!sampler) { 573 if (!sampler) {
573 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; 574 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
574 } 575 }
@@ -595,7 +596,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
595 lod = GetRegister(instr.gpr20.Value() + bias_offset); 596 lod = GetRegister(instr.gpr20.Value() + bias_offset);
596 break; 597 break;
597 default: 598 default:
598 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); 599 UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
599 break; 600 break;
600 } 601 }
601 602
@@ -725,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
725 info.is_shadow = depth_compare; 726 info.is_shadow = depth_compare;
726 727
727 Node index_var; 728 Node index_var;
728 const std::optional<Sampler> sampler = 729 const std::optional<SamplerEntry> sampler =
729 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) 730 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
730 : GetSampler(instr.sampler, info); 731 : GetSampler(instr.sampler, info);
731 Node4 values; 732 Node4 values;
@@ -784,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
784 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; 785 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
785 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; 786 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
786 787
787 const std::optional<Sampler> sampler = GetSampler(instr.sampler, {}); 788 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
788 789
789 Node4 values; 790 Node4 values;
790 for (u32 element = 0; element < values.size(); ++element) { 791 for (u32 element = 0; element < values.size(); ++element) {
@@ -801,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
801 info.type = texture_type; 802 info.type = texture_type;
802 info.is_array = is_array; 803 info.is_array = is_array;
803 info.is_shadow = false; 804 info.is_shadow = false;
804 const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); 805 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
805 806
806 const std::size_t type_coord_count = GetCoordCount(texture_type); 807 const std::size_t type_coord_count = GetCoordCount(texture_type);
807 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; 808 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 11b77f795..37433d783 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -27,7 +27,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
27 case VoteOperation::Eq: 27 case VoteOperation::Eq:
28 return OperationCode::VoteEqual; 28 return OperationCode::VoteEqual;
29 default: 29 default:
30 UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op)); 30 UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
31 return OperationCode::VoteAll; 31 return OperationCode::VoteAll;
32 } 32 }
33} 33}
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
index 4e8264367..cda284c72 100644
--- a/src/video_core/shader/expr.h
+++ b/src/video_core/shader/expr.h
@@ -76,7 +76,7 @@ public:
76 76
77class ExprPredicate final { 77class ExprPredicate final {
78public: 78public:
79 explicit ExprPredicate(u32 predicate) : predicate{predicate} {} 79 explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
80 80
81 bool operator==(const ExprPredicate& b) const { 81 bool operator==(const ExprPredicate& b) const {
82 return predicate == b.predicate; 82 return predicate == b.predicate;
@@ -91,7 +91,7 @@ public:
91 91
92class ExprCondCode final { 92class ExprCondCode final {
93public: 93public:
94 explicit ExprCondCode(ConditionCode cc) : cc{cc} {} 94 explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
95 95
96 bool operator==(const ExprCondCode& b) const { 96 bool operator==(const ExprCondCode& b) const {
97 return cc == b.cc; 97 return cc == b.cc;
@@ -121,7 +121,7 @@ public:
121 121
122class ExprGprEqual final { 122class ExprGprEqual final {
123public: 123public:
124 ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} 124 explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
125 125
126 bool operator==(const ExprGprEqual& b) const { 126 bool operator==(const ExprGprEqual& b) const {
127 return gpr == b.gpr && value == b.value; 127 return gpr == b.gpr && value == b.value;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a1e2c4d8e..b54d33763 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -282,26 +282,25 @@ struct SeparateSamplerNode;
282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; 282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
283using TrackSampler = std::shared_ptr<TrackSamplerData>; 283using TrackSampler = std::shared_ptr<TrackSamplerData>;
284 284
285struct Sampler { 285struct SamplerEntry {
286 /// Bound samplers constructor 286 /// Bound samplers constructor
287 constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, 287 explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
288 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) 288 bool is_shadow_, bool is_buffer_, bool is_indexed_)
289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, 289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {} 290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
291 291
292 /// Separate sampler constructor 292 /// Separate sampler constructor
293 constexpr explicit Sampler(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, 293 explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, 294 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
295 bool is_buffer_) 295 bool is_buffer_)
296 : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, 296 : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array_}, 297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
298 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} 298 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
299 299
300 /// Bindless samplers constructor 300 /// Bindless samplers constructor
301 constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, 301 explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
302 Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, 302 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
303 bool is_buffer_, bool is_indexed_) 303 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
304 : index{index_}, offset{offset_}, buffer{buffer_}, type{type}, is_array{is_array_},
305 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { 304 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
306 } 305 }
307 306
@@ -340,14 +339,14 @@ struct BindlessSamplerNode {
340 u32 offset; 339 u32 offset;
341}; 340};
342 341
343struct Image { 342struct ImageEntry {
344public: 343public:
345 /// Bound images constructor 344 /// Bound images constructor
346 constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) 345 explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
347 : index{index_}, offset{offset_}, type{type_} {} 346 : index{index_}, offset{offset_}, type{type_} {}
348 347
349 /// Bindless samplers constructor 348 /// Bindless samplers constructor
350 constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) 349 explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
351 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} 350 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
352 351
353 void MarkWrite() { 352 void MarkWrite() {
@@ -391,7 +390,7 @@ struct MetaArithmetic {
391 390
392/// Parameters describing a texture sampler 391/// Parameters describing a texture sampler
393struct MetaTexture { 392struct MetaTexture {
394 Sampler sampler; 393 SamplerEntry sampler;
395 Node array; 394 Node array;
396 Node depth_compare; 395 Node depth_compare;
397 std::vector<Node> aoffi; 396 std::vector<Node> aoffi;
@@ -405,7 +404,7 @@ struct MetaTexture {
405}; 404};
406 405
407struct MetaImage { 406struct MetaImage {
408 const Image& image; 407 const ImageEntry& image;
409 std::vector<Node> values; 408 std::vector<Node> values;
410 u32 element{}; 409 u32 element{};
411}; 410};
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 7bf4ff387..6a5b6940d 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -107,7 +107,7 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
107 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); 107 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
108 return {}; 108 return {};
109 default: 109 default:
110 UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); 110 UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
111 return {}; 111 return {};
112 } 112 }
113} 113}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 29d794b34..a4987ffc6 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -25,9 +25,10 @@ using Tegra::Shader::PredCondition;
25using Tegra::Shader::PredOperation; 25using Tegra::Shader::PredOperation;
26using Tegra::Shader::Register; 26using Tegra::Shader::Register;
27 27
28ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 28ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
29 Registry& registry) 29 Registry& registry_)
30 : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { 30 : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
31 registry_} {
31 Decode(); 32 Decode();
32 PostDecode(); 33 PostDecode();
33} 34}
@@ -170,7 +171,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe
170 // Default - do nothing 171 // Default - do nothing
171 return value; 172 return value;
172 default: 173 default:
173 UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); 174 UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
174 return value; 175 return value;
175 } 176 }
176} 177}
@@ -335,15 +336,15 @@ OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
335 return operation_table[index]; 336 return operation_table[index];
336} 337}
337 338
338Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { 339Node ShaderIR::GetConditionCode(ConditionCode cc) const {
339 switch (cc) { 340 switch (cc) {
340 case Tegra::Shader::ConditionCode::NEU: 341 case ConditionCode::NEU:
341 return GetInternalFlag(InternalFlag::Zero, true); 342 return GetInternalFlag(InternalFlag::Zero, true);
342 case Tegra::Shader::ConditionCode::FCSM_TR: 343 case ConditionCode::FCSM_TR:
343 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); 344 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
344 return MakeNode<PredicateNode>(Pred::NeverExecute, false); 345 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
345 default: 346 default:
346 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); 347 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
347 return MakeNode<PredicateNode>(Pred::NeverExecute, false); 348 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
348 } 349 }
349} 350}
@@ -451,8 +452,8 @@ void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
451} 452}
452 453
453std::size_t ShaderIR::DeclareAmend(Node new_amend) { 454std::size_t ShaderIR::DeclareAmend(Node new_amend) {
454 const std::size_t id = amend_code.size(); 455 const auto id = amend_code.size();
455 amend_code.push_back(new_amend); 456 amend_code.push_back(std::move(new_amend));
456 return id; 457 return id;
457} 458}
458 459
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 3a98b2104..0c6ab0f07 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -29,8 +29,8 @@ struct ShaderBlock;
29constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; 29constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
30 30
31struct ConstBuffer { 31struct ConstBuffer {
32 constexpr explicit ConstBuffer(u32 max_offset, bool is_indirect) 32 constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
33 : max_offset{max_offset}, is_indirect{is_indirect} {} 33 : max_offset{max_offset_}, is_indirect{is_indirect_} {}
34 34
35 constexpr ConstBuffer() = default; 35 constexpr ConstBuffer() = default;
36 36
@@ -66,8 +66,8 @@ struct GlobalMemoryUsage {
66 66
67class ShaderIR final { 67class ShaderIR final {
68public: 68public:
69 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 69 explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
70 Registry& registry); 70 CompilerSettings settings_, Registry& registry_);
71 ~ShaderIR(); 71 ~ShaderIR();
72 72
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 73 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -94,11 +94,11 @@ public:
94 return used_cbufs; 94 return used_cbufs;
95 } 95 }
96 96
97 const std::list<Sampler>& GetSamplers() const { 97 const std::list<SamplerEntry>& GetSamplers() const {
98 return used_samplers; 98 return used_samplers;
99 } 99 }
100 100
101 const std::list<Image>& GetImages() const { 101 const std::list<ImageEntry>& GetImages() const {
102 return used_images; 102 return used_images;
103 } 103 }
104 104
@@ -334,17 +334,17 @@ private:
334 std::optional<Tegra::Engines::SamplerDescriptor> sampler); 334 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
335 335
336 /// Accesses a texture sampler. 336 /// Accesses a texture sampler.
337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); 337 std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
338 338
339 /// Accesses a texture sampler for a bindless texture. 339 /// Accesses a texture sampler for a bindless texture.
340 std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, 340 std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
341 Node& index_var); 341 Node& index_var);
342 342
343 /// Accesses an image. 343 /// Accesses an image.
344 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 344 ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
345 345
346 /// Access a bindless image sampler. 346 /// Access a bindless image sampler.
347 Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); 347 ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
348 348
349 /// Extracts a sequence of bits from a node 349 /// Extracts a sequence of bits from a node
350 Node BitfieldExtract(Node value, u32 offset, u32 bits); 350 Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -454,8 +454,8 @@ private:
454 std::set<Tegra::Shader::Attribute::Index> used_input_attributes; 454 std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
455 std::set<Tegra::Shader::Attribute::Index> used_output_attributes; 455 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
456 std::map<u32, ConstBuffer> used_cbufs; 456 std::map<u32, ConstBuffer> used_cbufs;
457 std::list<Sampler> used_samplers; 457 std::list<SamplerEntry> used_samplers;
458 std::list<Image> used_images; 458 std::list<ImageEntry> used_images;
459 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 459 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
460 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; 460 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
461 bool uses_layer{}; 461 bool uses_layer{};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1688267bb..6308aef94 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -28,7 +28,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
28 case Tegra::Texture::TextureType::Texture2DArray: 28 case Tegra::Texture::TextureType::Texture2DArray:
29 return SurfaceTarget::Texture2DArray; 29 return SurfaceTarget::Texture2DArray;
30 default: 30 default:
31 LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type)); 31 LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", texture_type);
32 UNREACHABLE(); 32 UNREACHABLE();
33 return SurfaceTarget::Texture2D; 33 return SurfaceTarget::Texture2D;
34 } 34 }
@@ -47,7 +47,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
47 case SurfaceTarget::TextureCubeArray: 47 case SurfaceTarget::TextureCubeArray:
48 return true; 48 return true;
49 default: 49 default:
50 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); 50 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
51 UNREACHABLE(); 51 UNREACHABLE();
52 return false; 52 return false;
53 } 53 }
@@ -66,7 +66,7 @@ bool SurfaceTargetIsArray(SurfaceTarget target) {
66 case SurfaceTarget::TextureCubeArray: 66 case SurfaceTarget::TextureCubeArray:
67 return true; 67 return true;
68 default: 68 default:
69 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); 69 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
70 UNREACHABLE(); 70 UNREACHABLE();
71 return false; 71 return false;
72 } 72 }
@@ -85,7 +85,7 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
85 case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: 85 case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:
86 return PixelFormat::D32_FLOAT_S8_UINT; 86 return PixelFormat::D32_FLOAT_S8_UINT;
87 default: 87 default:
88 UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); 88 UNIMPLEMENTED_MSG("Unimplemented format={}", format);
89 return PixelFormat::S8_UINT_D24_UNORM; 89 return PixelFormat::S8_UINT_D24_UNORM;
90 } 90 }
91} 91}
@@ -183,7 +183,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
183 case Tegra::RenderTargetFormat::R8_UINT: 183 case Tegra::RenderTargetFormat::R8_UINT:
184 return PixelFormat::R8_UINT; 184 return PixelFormat::R8_UINT;
185 default: 185 default:
186 UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format)); 186 UNIMPLEMENTED_MSG("Unimplemented format={}", format);
187 return PixelFormat::A8B8G8R8_UNORM; 187 return PixelFormat::A8B8G8R8_UNORM;
188 } 188 }
189} 189}
@@ -197,7 +197,7 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
197 case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM: 197 case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
198 return PixelFormat::B8G8R8A8_UNORM; 198 return PixelFormat::B8G8R8A8_UNORM;
199 default: 199 default:
200 UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); 200 UNIMPLEMENTED_MSG("Unimplemented format={}", format);
201 return PixelFormat::A8B8G8R8_UNORM; 201 return PixelFormat::A8B8G8R8_UNORM;
202 } 202 }
203} 203}
@@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) {
280} 280}
281 281
282std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { 282std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
283 return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; 283 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
284} 284}
285 285
286} // namespace VideoCore::Surface 286} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index cfd12fa61..c40ab89d0 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -120,7 +120,7 @@ enum class PixelFormat {
120 Max = MaxDepthStencilFormat, 120 Max = MaxDepthStencilFormat,
121 Invalid = 255, 121 Invalid = 255,
122}; 122};
123static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); 123constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
124 124
125enum class SurfaceType { 125enum class SurfaceType {
126 ColorTexture = 0, 126 ColorTexture = 0,
@@ -140,117 +140,7 @@ enum class SurfaceTarget {
140 TextureCubeArray, 140 TextureCubeArray,
141}; 141};
142 142
143constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 143constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
144 0, // A8B8G8R8_UNORM
145 0, // A8B8G8R8_SNORM
146 0, // A8B8G8R8_SINT
147 0, // A8B8G8R8_UINT
148 0, // R5G6B5_UNORM
149 0, // B5G6R5_UNORM
150 0, // A1R5G5B5_UNORM
151 0, // A2B10G10R10_UNORM
152 0, // A2B10G10R10_UINT
153 0, // A1B5G5R5_UNORM
154 0, // R8_UNORM
155 0, // R8_SNORM
156 0, // R8_SINT
157 0, // R8_UINT
158 0, // R16G16B16A16_FLOAT
159 0, // R16G16B16A16_UNORM
160 0, // R16G16B16A16_SNORM
161 0, // R16G16B16A16_SINT
162 0, // R16G16B16A16_UINT
163 0, // B10G11R11_FLOAT
164 0, // R32G32B32A32_UINT
165 2, // BC1_RGBA_UNORM
166 2, // BC2_UNORM
167 2, // BC3_UNORM
168 2, // BC4_UNORM
169 2, // BC4_SNORM
170 2, // BC5_UNORM
171 2, // BC5_SNORM
172 2, // BC7_UNORM
173 2, // BC6H_UFLOAT
174 2, // BC6H_SFLOAT
175 2, // ASTC_2D_4X4_UNORM
176 0, // B8G8R8A8_UNORM
177 0, // R32G32B32A32_FLOAT
178 0, // R32G32B32A32_SINT
179 0, // R32G32_FLOAT
180 0, // R32G32_SINT
181 0, // R32_FLOAT
182 0, // R16_FLOAT
183 0, // R16_UNORM
184 0, // R16_SNORM
185 0, // R16_UINT
186 0, // R16_SINT
187 0, // R16G16_UNORM
188 0, // R16G16_FLOAT
189 0, // R16G16_UINT
190 0, // R16G16_SINT
191 0, // R16G16_SNORM
192 0, // R32G32B32_FLOAT
193 0, // A8B8G8R8_SRGB
194 0, // R8G8_UNORM
195 0, // R8G8_SNORM
196 0, // R8G8_SINT
197 0, // R8G8_UINT
198 0, // R32G32_UINT
199 0, // R16G16B16X16_FLOAT
200 0, // R32_UINT
201 0, // R32_SINT
202 2, // ASTC_2D_8X8_UNORM
203 2, // ASTC_2D_8X5_UNORM
204 2, // ASTC_2D_5X4_UNORM
205 0, // B8G8R8A8_SRGB
206 2, // BC1_RGBA_SRGB
207 2, // BC2_SRGB
208 2, // BC3_SRGB
209 2, // BC7_SRGB
210 0, // A4B4G4R4_UNORM
211 2, // ASTC_2D_4X4_SRGB
212 2, // ASTC_2D_8X8_SRGB
213 2, // ASTC_2D_8X5_SRGB
214 2, // ASTC_2D_5X4_SRGB
215 2, // ASTC_2D_5X5_UNORM
216 2, // ASTC_2D_5X5_SRGB
217 2, // ASTC_2D_10X8_UNORM
218 2, // ASTC_2D_10X8_SRGB
219 2, // ASTC_2D_6X6_UNORM
220 2, // ASTC_2D_6X6_SRGB
221 2, // ASTC_2D_10X10_UNORM
222 2, // ASTC_2D_10X10_SRGB
223 2, // ASTC_2D_12X12_UNORM
224 2, // ASTC_2D_12X12_SRGB
225 2, // ASTC_2D_8X6_UNORM
226 2, // ASTC_2D_8X6_SRGB
227 2, // ASTC_2D_6X5_UNORM
228 2, // ASTC_2D_6X5_SRGB
229 0, // E5B9G9R9_FLOAT
230 0, // D32_FLOAT
231 0, // D16_UNORM
232 0, // D24_UNORM_S8_UINT
233 0, // S8_UINT_D24_UNORM
234 0, // D32_FLOAT_S8_UINT
235}};
236
237/**
238 * Gets the compression factor for the specified PixelFormat. This applies to just the
239 * "compressed width" and "compressed height", not the overall compression factor of a
240 * compressed image. This is used for maintaining proper surface sizes for compressed
241 * texture formats.
242 */
243inline constexpr u32 GetCompressionFactorShift(PixelFormat format) {
244 DEBUG_ASSERT(format != PixelFormat::Invalid);
245 DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size());
246 return compression_factor_shift_table[static_cast<std::size_t>(format)];
247}
248
249inline constexpr u32 GetCompressionFactor(PixelFormat format) {
250 return 1U << GetCompressionFactorShift(format);
251}
252
253constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
254 1, // A8B8G8R8_UNORM 144 1, // A8B8G8R8_UNORM
255 1, // A8B8G8R8_SNORM 145 1, // A8B8G8R8_SNORM
256 1, // A8B8G8R8_SINT 146 1, // A8B8G8R8_SINT
@@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
344 1, // D32_FLOAT_S8_UINT 234 1, // D32_FLOAT_S8_UINT
345}}; 235}};
346 236
347static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { 237constexpr u32 DefaultBlockWidth(PixelFormat format) {
348 if (format == PixelFormat::Invalid) 238 ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size());
349 return 0; 239 return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)];
350
351 ASSERT(static_cast<std::size_t>(format) < block_width_table.size());
352 return block_width_table[static_cast<std::size_t>(format)];
353} 240}
354 241
355constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 242constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
356 1, // A8B8G8R8_UNORM 243 1, // A8B8G8R8_UNORM
357 1, // A8B8G8R8_SNORM 244 1, // A8B8G8R8_SNORM
358 1, // A8B8G8R8_SINT 245 1, // A8B8G8R8_SINT
@@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
446 1, // D32_FLOAT_S8_UINT 333 1, // D32_FLOAT_S8_UINT
447}}; 334}};
448 335
449static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { 336constexpr u32 DefaultBlockHeight(PixelFormat format) {
450 if (format == PixelFormat::Invalid) 337 ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size());
451 return 0; 338 return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)];
452
453 ASSERT(static_cast<std::size_t>(format) < block_height_table.size());
454 return block_height_table[static_cast<std::size_t>(format)];
455} 339}
456 340
457constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 341constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
458 32, // A8B8G8R8_UNORM 342 32, // A8B8G8R8_UNORM
459 32, // A8B8G8R8_SNORM 343 32, // A8B8G8R8_SNORM
460 32, // A8B8G8R8_SINT 344 32, // A8B8G8R8_SINT
@@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
548 64, // D32_FLOAT_S8_UINT 432 64, // D32_FLOAT_S8_UINT
549}}; 433}};
550 434
551static constexpr u32 GetFormatBpp(PixelFormat format) { 435constexpr u32 BitsPerBlock(PixelFormat format) {
552 if (format == PixelFormat::Invalid) 436 ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size());
553 return 0; 437 return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)];
554
555 ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
556 return bpp_table[static_cast<std::size_t>(format)];
557} 438}
558 439
559/// Returns the sizer in bytes of the specified pixel format 440/// Returns the sizer in bytes of the specified pixel format
560static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { 441constexpr u32 BytesPerBlock(PixelFormat pixel_format) {
561 if (pixel_format == PixelFormat::Invalid) { 442 return BitsPerBlock(pixel_format) / CHAR_BIT;
562 return 0;
563 }
564 return GetFormatBpp(pixel_format) / CHAR_BIT;
565} 443}
566 444
567SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); 445SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
new file mode 100644
index 000000000..a4fc1184b
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/alignment.h"
9#include "common/common_types.h"
10#include "common/div_ceil.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/accelerated_swizzle.h"
13#include "video_core/texture_cache/util.h"
14#include "video_core/textures/decoders.h"
15
16namespace VideoCommon::Accelerated {
17
18using Tegra::Texture::GOB_SIZE_SHIFT;
19using Tegra::Texture::GOB_SIZE_X;
20using Tegra::Texture::GOB_SIZE_X_SHIFT;
21using Tegra::Texture::GOB_SIZE_Y_SHIFT;
22using VideoCore::Surface::BytesPerBlock;
23
24BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
25 const ImageInfo& info) {
26 const Extent3D block = swizzle.block;
27 const Extent3D num_tiles = swizzle.num_tiles;
28 const u32 bytes_per_block = BytesPerBlock(info.format);
29 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
30 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
31 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
32 return BlockLinearSwizzle2DParams{
33 .origin{0, 0, 0},
34 .destination{0, 0, 0},
35 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
36 .layer_stride = info.layer_stride,
37 .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
38 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
39 .block_height = block.height,
40 .block_height_mask = (1U << block.height) - 1,
41 };
42}
43
44BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
45 const ImageInfo& info) {
46 const Extent3D block = swizzle.block;
47 const Extent3D num_tiles = swizzle.num_tiles;
48 const u32 bytes_per_block = BytesPerBlock(info.format);
49 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
50 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
51
52 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
53 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
54 const u32 slice_size =
55 Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
56 return BlockLinearSwizzle3DParams{
57 .origin{0, 0, 0},
58 .destination{0, 0, 0},
59 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
60 .slice_size = slice_size,
61 .block_size = block_size,
62 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
63 .block_height = block.height,
64 .block_height_mask = (1U << block.height) - 1,
65 .block_depth = block.depth,
66 .block_depth_mask = (1U << block.depth) - 1,
67 };
68}
69
70} // namespace VideoCommon::Accelerated \ No newline at end of file
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
new file mode 100644
index 000000000..6ec5c78c4
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -0,0 +1,45 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/types.h"
12
13namespace VideoCommon::Accelerated {
14
15struct BlockLinearSwizzle2DParams {
16 std::array<u32, 3> origin;
17 std::array<s32, 3> destination;
18 u32 bytes_per_block_log2;
19 u32 layer_stride;
20 u32 block_size;
21 u32 x_shift;
22 u32 block_height;
23 u32 block_height_mask;
24};
25
26struct BlockLinearSwizzle3DParams {
27 std::array<u32, 3> origin;
28 std::array<s32, 3> destination;
29 u32 bytes_per_block_log2;
30 u32 slice_size;
31 u32 block_size;
32 u32 x_shift;
33 u32 block_height;
34 u32 block_height_mask;
35 u32 block_depth;
36 u32 block_depth_mask;
37};
38
39[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
40 const SwizzleParameters& swizzle, const ImageInfo& info);
41
42[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
43 const SwizzleParameters& swizzle, const ImageInfo& info);
44
45} // namespace VideoCommon::Accelerated
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
deleted file mode 100644
index 9c21a0649..000000000
--- a/src/video_core/texture_cache/copy_params.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11struct CopyParams {
12 constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
13 u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
14 u32 depth)
15 : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x},
16 dest_y{dest_y}, dest_z{dest_z}, source_level{source_level},
17 dest_level{dest_level}, width{width}, height{height}, depth{depth} {}
18
19 constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level)
20 : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level},
21 dest_level{level}, width{width}, height{height}, depth{depth} {}
22
23 u32 source_x;
24 u32 source_y;
25 u32 source_z;
26 u32 dest_x;
27 u32 dest_y;
28 u32 dest_z;
29 u32 source_level;
30 u32 dest_level;
31 u32 width;
32 u32 height;
33 u32 depth;
34};
35
36} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
new file mode 100644
index 000000000..017327975
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -0,0 +1,97 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <span>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/texture_cache/decode_bc4.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
17[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
18 const u32 code_offset = 16 + 3 * (4 * y + x);
19 const u32 code = (bits >> code_offset) & 7;
20 const u32 red0 = (bits >> 0) & 0xff;
21 const u32 red1 = (bits >> 8) & 0xff;
22 if (red0 > red1) {
23 switch (code) {
24 case 0:
25 return red0;
26 case 1:
27 return red1;
28 case 2:
29 return (6 * red0 + 1 * red1) / 7;
30 case 3:
31 return (5 * red0 + 2 * red1) / 7;
32 case 4:
33 return (4 * red0 + 3 * red1) / 7;
34 case 5:
35 return (3 * red0 + 4 * red1) / 7;
36 case 6:
37 return (2 * red0 + 5 * red1) / 7;
38 case 7:
39 return (1 * red0 + 6 * red1) / 7;
40 }
41 } else {
42 switch (code) {
43 case 0:
44 return red0;
45 case 1:
46 return red1;
47 case 2:
48 return (4 * red0 + 1 * red1) / 5;
49 case 3:
50 return (3 * red0 + 2 * red1) / 5;
51 case 4:
52 return (2 * red0 + 3 * red1) / 5;
53 case 5:
54 return (1 * red0 + 4 * red1) / 5;
55 case 6:
56 return 0;
57 case 7:
58 return 0xff;
59 }
60 }
61 return 0;
62}
63
64void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
65 UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
66 UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
67 static constexpr u32 BLOCK_SIZE = 4;
68 size_t input_offset = 0;
69 for (u32 slice = 0; slice < extent.depth; ++slice) {
70 for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
71 for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
72 u64 bits;
73 std::memcpy(&bits, &input[input_offset], sizeof(bits));
74 input_offset += sizeof(bits);
75
76 for (u32 y = 0; y < BLOCK_SIZE; ++y) {
77 for (u32 x = 0; x < BLOCK_SIZE; ++x) {
78 const u32 linear_z = slice;
79 const u32 linear_y = block_y * BLOCK_SIZE + y;
80 const u32 linear_x = block_x * BLOCK_SIZE + x;
81 const u32 offset_z = linear_z * extent.width * extent.height;
82 const u32 offset_y = linear_y * extent.width;
83 const u32 offset_x = linear_x;
84 const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
85 const u32 color = DecompressBlock(bits, x, y);
86 output[output_offset + 0] = static_cast<u8>(color);
87 output[output_offset + 1] = 0;
88 output[output_offset + 2] = 0;
89 output[output_offset + 3] = 0xff;
90 }
91 }
92 }
93 }
94 }
95}
96
97} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
new file mode 100644
index 000000000..63fb23508
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -0,0 +1,16 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
15
16} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
new file mode 100644
index 000000000..3a03b786f
--- /dev/null
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -0,0 +1,82 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/div_ceil.h"
12#include "common/logging/log.h"
13#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h"
15
16namespace VideoCommon {
17
18template <typename Descriptor>
19class DescriptorTable {
20public:
21 explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
22
23 [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
24 [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
25 return false;
26 }
27 Refresh(gpu_addr, limit);
28 return true;
29 }
30
31 void Invalidate() noexcept {
32 std::ranges::fill(read_descriptors, 0);
33 }
34
35 [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
36 DEBUG_ASSERT(index <= current_limit);
37 const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
38 std::pair<Descriptor, bool> result;
39 gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
40 if (IsDescriptorRead(index)) {
41 result.second = result.first != descriptors[index];
42 } else {
43 MarkDescriptorAsRead(index);
44 result.second = true;
45 }
46 if (result.second) {
47 descriptors[index] = result.first;
48 }
49 return result;
50 }
51
52 [[nodiscard]] u32 Limit() const noexcept {
53 return current_limit;
54 }
55
56private:
57 void Refresh(GPUVAddr gpu_addr, u32 limit) {
58 current_gpu_addr = gpu_addr;
59 current_limit = limit;
60
61 const size_t num_descriptors = static_cast<size_t>(limit) + 1;
62 read_descriptors.clear();
63 read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
64 descriptors.resize(num_descriptors);
65 }
66
67 void MarkDescriptorAsRead(u32 index) noexcept {
68 read_descriptors[index / 64] |= 1ULL << (index % 64);
69 }
70
71 [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
72 return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
73 }
74
75 Tegra::MemoryManager& gpu_memory;
76 GPUVAddr current_gpu_addr{};
77 u32 current_limit{};
78 std::vector<u64> read_descriptors;
79 std::vector<Descriptor> descriptors;
80};
81
82} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7d5a75648..ddfb726fe 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include "common/common_types.h" 5#include "common/common_types.h"
7#include "common/logging/log.h" 6#include "common/logging/log.h"
8#include "video_core/texture_cache/format_lookup_table.h" 7#include "video_core/texture_cache/format_lookup_table.h"
@@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;
20constexpr auto SINT = ComponentType::SINT; 19constexpr auto SINT = ComponentType::SINT;
21constexpr auto UINT = ComponentType::UINT; 20constexpr auto UINT = ComponentType::UINT;
22constexpr auto FLOAT = ComponentType::FLOAT; 21constexpr auto FLOAT = ComponentType::FLOAT;
23constexpr bool C = false; // Normal color 22constexpr bool LINEAR = false;
24constexpr bool S = true; // Srgb 23constexpr bool SRGB = true;
25 24
26struct Table { 25constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
27 constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component, 26 ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
28 ComponentType green_component, ComponentType blue_component, 27 u32 hash = is_srgb ? 1 : 0;
29 ComponentType alpha_component, PixelFormat pixel_format) 28 hash |= static_cast<u32>(red_component) << 1;
30 : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component}, 29 hash |= static_cast<u32>(green_component) << 4;
31 green_component{green_component}, blue_component{blue_component}, 30 hash |= static_cast<u32>(blue_component) << 7;
32 alpha_component{alpha_component}, is_srgb{is_srgb} {} 31 hash |= static_cast<u32>(alpha_component) << 10;
33 32 hash |= static_cast<u32>(format) << 13;
34 TextureFormat texture_format; 33 return hash;
35 PixelFormat pixel_format; 34}
36 ComponentType red_component;
37 ComponentType green_component;
38 ComponentType blue_component;
39 ComponentType alpha_component;
40 bool is_srgb;
41};
42constexpr std::array<Table, 86> DefinitionTable = {{
43 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
44 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
45 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
46 {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
47 {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
48
49 {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
50
51 {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
52 {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
53
54 {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
55
56 {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
57
58 {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
59 {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
60 {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
61 {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
62
63 {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
64 {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
65 {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
66 {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
67
68 {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
69 {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
70 {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
71 {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
72 {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
73
74 {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
75 {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
76 {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
77 {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
78 {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
79
80 {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
81 {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
82 {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
83 {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
84 {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
85
86 {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
87
88 {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
89 {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
90 {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
91
92 {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
93
94 {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
95 {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
96 {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
97
98 {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
99 {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
100 {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
101
102 {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
103
104 {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
105 {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
106 {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
107 {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
108 {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
109
110 {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
111 {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
112
113 {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
114 {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
115
116 {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
117 {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
118
119 {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
120 {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
121
122 {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
123 {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
124
125 {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
126 {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
127
128 {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
129 {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
130
131 {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
132 {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
133
134 {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
135 {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
136
137 {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
138 {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
139
140 {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
141 {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
142
143 {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
144 {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
145
146 {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
147 {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
148
149 {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
150 {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
151
152 {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
153 {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
154
155 {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
156 {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
157
158 {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
159 {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
160 35
161 {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, 36constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) {
162 {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, 37 return Hash(format, component, component, component, component, is_srgb);
163}}; 38}
164 39
165} // Anonymous namespace 40} // Anonymous namespace
166 41
167FormatLookupTable::FormatLookupTable() { 42PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
168 table.fill(static_cast<u8>(PixelFormat::Invalid)); 43 ComponentType blue, ComponentType alpha,
169 44 bool is_srgb) noexcept {
170 for (const auto& entry : DefinitionTable) { 45 switch (Hash(format, red, green, blue, alpha, is_srgb)) {
171 table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, 46 case Hash(TextureFormat::A8R8G8B8, UNORM):
172 entry.green_component, entry.blue_component, entry.alpha_component)] = 47 return PixelFormat::A8B8G8R8_UNORM;
173 static_cast<u8>(entry.pixel_format); 48 case Hash(TextureFormat::A8R8G8B8, SNORM):
174 } 49 return PixelFormat::A8B8G8R8_SNORM;
175} 50 case Hash(TextureFormat::A8R8G8B8, UINT):
176 51 return PixelFormat::A8B8G8R8_UINT;
177PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, 52 case Hash(TextureFormat::A8R8G8B8, SINT):
178 ComponentType red_component, 53 return PixelFormat::A8B8G8R8_SINT;
179 ComponentType green_component, 54 case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
180 ComponentType blue_component, 55 return PixelFormat::A8B8G8R8_SRGB;
181 ComponentType alpha_component) const noexcept { 56 case Hash(TextureFormat::B5G6R5, UNORM):
182 const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( 57 return PixelFormat::B5G6R5_UNORM;
183 format, is_srgb, red_component, green_component, blue_component, alpha_component)]); 58 case Hash(TextureFormat::A2B10G10R10, UNORM):
184 // [[likely]] 59 return PixelFormat::A2B10G10R10_UNORM;
185 if (pixel_format != PixelFormat::Invalid) { 60 case Hash(TextureFormat::A2B10G10R10, UINT):
186 return pixel_format; 61 return PixelFormat::A2B10G10R10_UINT;
62 case Hash(TextureFormat::A1B5G5R5, UNORM):
63 return PixelFormat::A1B5G5R5_UNORM;
64 case Hash(TextureFormat::A4B4G4R4, UNORM):
65 return PixelFormat::A4B4G4R4_UNORM;
66 case Hash(TextureFormat::R8, UNORM):
67 return PixelFormat::R8_UNORM;
68 case Hash(TextureFormat::R8, SNORM):
69 return PixelFormat::R8_SNORM;
70 case Hash(TextureFormat::R8, UINT):
71 return PixelFormat::R8_UINT;
72 case Hash(TextureFormat::R8, SINT):
73 return PixelFormat::R8_SINT;
74 case Hash(TextureFormat::R8G8, UNORM):
75 return PixelFormat::R8G8_UNORM;
76 case Hash(TextureFormat::R8G8, SNORM):
77 return PixelFormat::R8G8_SNORM;
78 case Hash(TextureFormat::R8G8, UINT):
79 return PixelFormat::R8G8_UINT;
80 case Hash(TextureFormat::R8G8, SINT):
81 return PixelFormat::R8G8_SINT;
82 case Hash(TextureFormat::R16G16B16A16, FLOAT):
83 return PixelFormat::R16G16B16A16_FLOAT;
84 case Hash(TextureFormat::R16G16B16A16, UNORM):
85 return PixelFormat::R16G16B16A16_UNORM;
86 case Hash(TextureFormat::R16G16B16A16, SNORM):
87 return PixelFormat::R16G16B16A16_SNORM;
88 case Hash(TextureFormat::R16G16B16A16, UINT):
89 return PixelFormat::R16G16B16A16_UINT;
90 case Hash(TextureFormat::R16G16B16A16, SINT):
91 return PixelFormat::R16G16B16A16_SINT;
92 case Hash(TextureFormat::R16G16, FLOAT):
93 return PixelFormat::R16G16_FLOAT;
94 case Hash(TextureFormat::R16G16, UNORM):
95 return PixelFormat::R16G16_UNORM;
96 case Hash(TextureFormat::R16G16, SNORM):
97 return PixelFormat::R16G16_SNORM;
98 case Hash(TextureFormat::R16G16, UINT):
99 return PixelFormat::R16G16_UINT;
100 case Hash(TextureFormat::R16G16, SINT):
101 return PixelFormat::R16G16_SINT;
102 case Hash(TextureFormat::R16, FLOAT):
103 return PixelFormat::R16_FLOAT;
104 case Hash(TextureFormat::R16, UNORM):
105 return PixelFormat::R16_UNORM;
106 case Hash(TextureFormat::R16, SNORM):
107 return PixelFormat::R16_SNORM;
108 case Hash(TextureFormat::R16, UINT):
109 return PixelFormat::R16_UINT;
110 case Hash(TextureFormat::R16, SINT):
111 return PixelFormat::R16_SINT;
112 case Hash(TextureFormat::B10G11R11, FLOAT):
113 return PixelFormat::B10G11R11_FLOAT;
114 case Hash(TextureFormat::R32G32B32A32, FLOAT):
115 return PixelFormat::R32G32B32A32_FLOAT;
116 case Hash(TextureFormat::R32G32B32A32, UINT):
117 return PixelFormat::R32G32B32A32_UINT;
118 case Hash(TextureFormat::R32G32B32A32, SINT):
119 return PixelFormat::R32G32B32A32_SINT;
120 case Hash(TextureFormat::R32G32B32, FLOAT):
121 return PixelFormat::R32G32B32_FLOAT;
122 case Hash(TextureFormat::R32G32, FLOAT):
123 return PixelFormat::R32G32_FLOAT;
124 case Hash(TextureFormat::R32G32, UINT):
125 return PixelFormat::R32G32_UINT;
126 case Hash(TextureFormat::R32G32, SINT):
127 return PixelFormat::R32G32_SINT;
128 case Hash(TextureFormat::R32, FLOAT):
129 return PixelFormat::R32_FLOAT;
130 case Hash(TextureFormat::R32, UINT):
131 return PixelFormat::R32_UINT;
132 case Hash(TextureFormat::R32, SINT):
133 return PixelFormat::R32_SINT;
134 case Hash(TextureFormat::E5B9G9R9, FLOAT):
135 return PixelFormat::E5B9G9R9_FLOAT;
136 case Hash(TextureFormat::D32, FLOAT):
137 return PixelFormat::D32_FLOAT;
138 case Hash(TextureFormat::D16, UNORM):
139 return PixelFormat::D16_UNORM;
140 case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
141 return PixelFormat::S8_UINT_D24_UNORM;
142 case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
143 return PixelFormat::S8_UINT_D24_UNORM;
144 case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
145 return PixelFormat::D32_FLOAT_S8_UINT;
146 case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
147 return PixelFormat::BC1_RGBA_UNORM;
148 case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
149 return PixelFormat::BC1_RGBA_SRGB;
150 case Hash(TextureFormat::BC2, UNORM, LINEAR):
151 return PixelFormat::BC2_UNORM;
152 case Hash(TextureFormat::BC2, UNORM, SRGB):
153 return PixelFormat::BC2_SRGB;
154 case Hash(TextureFormat::BC3, UNORM, LINEAR):
155 return PixelFormat::BC3_UNORM;
156 case Hash(TextureFormat::BC3, UNORM, SRGB):
157 return PixelFormat::BC3_SRGB;
158 case Hash(TextureFormat::BC4, UNORM):
159 return PixelFormat::BC4_UNORM;
160 case Hash(TextureFormat::BC4, SNORM):
161 return PixelFormat::BC4_SNORM;
162 case Hash(TextureFormat::BC5, UNORM):
163 return PixelFormat::BC5_UNORM;
164 case Hash(TextureFormat::BC5, SNORM):
165 return PixelFormat::BC5_SNORM;
166 case Hash(TextureFormat::BC7, UNORM, LINEAR):
167 return PixelFormat::BC7_UNORM;
168 case Hash(TextureFormat::BC7, UNORM, SRGB):
169 return PixelFormat::BC7_SRGB;
170 case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
171 return PixelFormat::BC6H_SFLOAT;
172 case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
173 return PixelFormat::BC6H_UFLOAT;
174 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
175 return PixelFormat::ASTC_2D_4X4_UNORM;
176 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
177 return PixelFormat::ASTC_2D_4X4_SRGB;
178 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR):
179 return PixelFormat::ASTC_2D_5X4_UNORM;
180 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB):
181 return PixelFormat::ASTC_2D_5X4_SRGB;
182 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR):
183 return PixelFormat::ASTC_2D_5X5_UNORM;
184 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB):
185 return PixelFormat::ASTC_2D_5X5_SRGB;
186 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR):
187 return PixelFormat::ASTC_2D_8X8_UNORM;
188 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB):
189 return PixelFormat::ASTC_2D_8X8_SRGB;
190 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR):
191 return PixelFormat::ASTC_2D_8X5_UNORM;
192 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB):
193 return PixelFormat::ASTC_2D_8X5_SRGB;
194 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR):
195 return PixelFormat::ASTC_2D_10X8_UNORM;
196 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB):
197 return PixelFormat::ASTC_2D_10X8_SRGB;
198 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR):
199 return PixelFormat::ASTC_2D_6X6_UNORM;
200 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
201 return PixelFormat::ASTC_2D_6X6_SRGB;
202 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
203 return PixelFormat::ASTC_2D_10X10_UNORM;
204 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
205 return PixelFormat::ASTC_2D_10X10_SRGB;
206 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
207 return PixelFormat::ASTC_2D_12X12_UNORM;
208 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
209 return PixelFormat::ASTC_2D_12X12_SRGB;
210 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR):
211 return PixelFormat::ASTC_2D_8X6_UNORM;
212 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB):
213 return PixelFormat::ASTC_2D_8X6_SRGB;
214 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR):
215 return PixelFormat::ASTC_2D_6X5_UNORM;
216 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
217 return PixelFormat::ASTC_2D_6X5_SRGB;
187 } 218 }
188 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", 219 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
189 static_cast<int>(format), is_srgb, static_cast<int>(red_component), 220 static_cast<int>(format), is_srgb, static_cast<int>(red),
190 static_cast<int>(green_component), static_cast<int>(blue_component), 221 static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
191 static_cast<int>(alpha_component));
192 return PixelFormat::A8B8G8R8_UNORM; 222 return PixelFormat::A8B8G8R8_UNORM;
193} 223}
194 224
195void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
196 ComponentType green_component, ComponentType blue_component,
197 ComponentType alpha_component, PixelFormat pixel_format) {}
198
199std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
200 ComponentType red_component,
201 ComponentType green_component,
202 ComponentType blue_component,
203 ComponentType alpha_component) noexcept {
204 const auto format_index = static_cast<std::size_t>(format);
205 const auto red_index = static_cast<std::size_t>(red_component);
206 const auto green_index = static_cast<std::size_t>(green_component);
207 const auto blue_index = static_cast<std::size_t>(blue_component);
208 const auto alpha_index = static_cast<std::size_t>(alpha_component);
209 const std::size_t srgb_index = is_srgb ? 1 : 0;
210
211 return format_index * PerFormat +
212 srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
213 alpha_index * PerComponent * PerComponent * PerComponent +
214 blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
215}
216
217} // namespace VideoCommon 225} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index aa77e0a5a..729533999 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -4,48 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <limits>
9#include "video_core/surface.h" 7#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 8#include "video_core/textures/texture.h"
11 9
12namespace VideoCommon { 10namespace VideoCommon {
13 11
14class FormatLookupTable { 12VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo(
15public: 13 Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component,
16 explicit FormatLookupTable(); 14 Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component,
17 15 Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;
18 VideoCore::Surface::PixelFormat GetPixelFormat(
19 Tegra::Texture::TextureFormat format, bool is_srgb,
20 Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
21 Tegra::Texture::ComponentType blue_component,
22 Tegra::Texture::ComponentType alpha_component) const noexcept;
23
24private:
25 static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
26
27 static constexpr std::size_t NumTextureFormats = 128;
28
29 static constexpr std::size_t PerComponent = 8;
30 static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
31 static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
32 static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
33 static constexpr std::size_t PerFormat = PerComponents4 * 2;
34
35 static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
36 Tegra::Texture::ComponentType red_component,
37 Tegra::Texture::ComponentType green_component,
38 Tegra::Texture::ComponentType blue_component,
39 Tegra::Texture::ComponentType alpha_component) noexcept;
40
41 void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
42 Tegra::Texture::ComponentType red_component,
43 Tegra::Texture::ComponentType green_component,
44 Tegra::Texture::ComponentType blue_component,
45 Tegra::Texture::ComponentType alpha_component,
46 VideoCore::Surface::PixelFormat pixel_format);
47
48 std::array<u8, NumTextureFormats * PerFormat> table;
49};
50 16
51} // namespace VideoCommon 17} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
new file mode 100644
index 000000000..d10ba4ccd
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -0,0 +1,95 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7
8#include "video_core/texture_cache/formatter.h"
9#include "video_core/texture_cache/image_base.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/image_view_base.h"
12#include "video_core/texture_cache/render_targets.h"
13
14namespace VideoCommon {
15
16std::string Name(const ImageBase& image) {
17 const GPUVAddr gpu_addr = image.gpu_addr;
18 const ImageInfo& info = image.info;
19 const u32 width = info.size.width;
20 const u32 height = info.size.height;
21 const u32 depth = info.size.depth;
22 const u32 num_layers = image.info.resources.layers;
23 const u32 num_levels = image.info.resources.levels;
24 std::string resource;
25 if (num_layers > 1) {
26 resource += fmt::format(":L{}", num_layers);
27 }
28 if (num_levels > 1) {
29 resource += fmt::format(":M{}", num_levels);
30 }
31 switch (image.info.type) {
32 case ImageType::e1D:
33 return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource);
34 case ImageType::e2D:
35 return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource);
36 case ImageType::e3D:
37 return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource);
38 case ImageType::Linear:
39 return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height);
40 case ImageType::Buffer:
41 return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width);
42 }
43 return "Invalid";
44}
45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth;
50 const u32 num_levels = image_view.range.extent.levels;
51 const u32 num_layers = image_view.range.extent.layers;
52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) {
55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D:
58 return fmt::format("ImageView 2D {}x{}{}", width, height, level);
59 case ImageViewType::Cube:
60 return fmt::format("ImageView Cube {}x{}{}", width, height, level);
61 case ImageViewType::e3D:
62 return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
63 case ImageViewType::e1DArray:
64 return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
65 case ImageViewType::e2DArray:
66 return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
67 case ImageViewType::CubeArray:
68 return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
69 case ImageViewType::Rect:
70 return fmt::format("ImageView Rect {}x{}{}", width, height, level);
71 case ImageViewType::Buffer:
72 return fmt::format("BufferView {}", width);
73 }
74 return "Invalid";
75}
76
77std::string Name(const RenderTargets& render_targets) {
78 std::string_view debug_prefix;
79 const auto num_color = std::ranges::count_if(
80 render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); });
81 if (render_targets.depth_buffer_id) {
82 debug_prefix = num_color > 0 ? "R" : "Z";
83 } else {
84 debug_prefix = num_color > 0 ? "C" : "X";
85 }
86 const Extent2D size = render_targets.size;
87 if (num_color > 0) {
88 return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width,
89 size.height);
90 } else {
91 return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height);
92 }
93}
94
95} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
new file mode 100644
index 000000000..a48413983
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.h
@@ -0,0 +1,263 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "video_core/surface.h"
12#include "video_core/texture_cache/types.h"
13
14template <>
15struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> {
16 template <typename FormatContext>
17 auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) {
18 using VideoCore::Surface::PixelFormat;
19 const string_view name = [format] {
20 switch (format) {
21 case PixelFormat::A8B8G8R8_UNORM:
22 return "A8B8G8R8_UNORM";
23 case PixelFormat::A8B8G8R8_SNORM:
24 return "A8B8G8R8_SNORM";
25 case PixelFormat::A8B8G8R8_SINT:
26 return "A8B8G8R8_SINT";
27 case PixelFormat::A8B8G8R8_UINT:
28 return "A8B8G8R8_UINT";
29 case PixelFormat::R5G6B5_UNORM:
30 return "R5G6B5_UNORM";
31 case PixelFormat::B5G6R5_UNORM:
32 return "B5G6R5_UNORM";
33 case PixelFormat::A1R5G5B5_UNORM:
34 return "A1R5G5B5_UNORM";
35 case PixelFormat::A2B10G10R10_UNORM:
36 return "A2B10G10R10_UNORM";
37 case PixelFormat::A2B10G10R10_UINT:
38 return "A2B10G10R10_UINT";
39 case PixelFormat::A1B5G5R5_UNORM:
40 return "A1B5G5R5_UNORM";
41 case PixelFormat::R8_UNORM:
42 return "R8_UNORM";
43 case PixelFormat::R8_SNORM:
44 return "R8_SNORM";
45 case PixelFormat::R8_SINT:
46 return "R8_SINT";
47 case PixelFormat::R8_UINT:
48 return "R8_UINT";
49 case PixelFormat::R16G16B16A16_FLOAT:
50 return "R16G16B16A16_FLOAT";
51 case PixelFormat::R16G16B16A16_UNORM:
52 return "R16G16B16A16_UNORM";
53 case PixelFormat::R16G16B16A16_SNORM:
54 return "R16G16B16A16_SNORM";
55 case PixelFormat::R16G16B16A16_SINT:
56 return "R16G16B16A16_SINT";
57 case PixelFormat::R16G16B16A16_UINT:
58 return "R16G16B16A16_UINT";
59 case PixelFormat::B10G11R11_FLOAT:
60 return "B10G11R11_FLOAT";
61 case PixelFormat::R32G32B32A32_UINT:
62 return "R32G32B32A32_UINT";
63 case PixelFormat::BC1_RGBA_UNORM:
64 return "BC1_RGBA_UNORM";
65 case PixelFormat::BC2_UNORM:
66 return "BC2_UNORM";
67 case PixelFormat::BC3_UNORM:
68 return "BC3_UNORM";
69 case PixelFormat::BC4_UNORM:
70 return "BC4_UNORM";
71 case PixelFormat::BC4_SNORM:
72 return "BC4_SNORM";
73 case PixelFormat::BC5_UNORM:
74 return "BC5_UNORM";
75 case PixelFormat::BC5_SNORM:
76 return "BC5_SNORM";
77 case PixelFormat::BC7_UNORM:
78 return "BC7_UNORM";
79 case PixelFormat::BC6H_UFLOAT:
80 return "BC6H_UFLOAT";
81 case PixelFormat::BC6H_SFLOAT:
82 return "BC6H_SFLOAT";
83 case PixelFormat::ASTC_2D_4X4_UNORM:
84 return "ASTC_2D_4X4_UNORM";
85 case PixelFormat::B8G8R8A8_UNORM:
86 return "B8G8R8A8_UNORM";
87 case PixelFormat::R32G32B32A32_FLOAT:
88 return "R32G32B32A32_FLOAT";
89 case PixelFormat::R32G32B32A32_SINT:
90 return "R32G32B32A32_SINT";
91 case PixelFormat::R32G32_FLOAT:
92 return "R32G32_FLOAT";
93 case PixelFormat::R32G32_SINT:
94 return "R32G32_SINT";
95 case PixelFormat::R32_FLOAT:
96 return "R32_FLOAT";
97 case PixelFormat::R16_FLOAT:
98 return "R16_FLOAT";
99 case PixelFormat::R16_UNORM:
100 return "R16_UNORM";
101 case PixelFormat::R16_SNORM:
102 return "R16_SNORM";
103 case PixelFormat::R16_UINT:
104 return "R16_UINT";
105 case PixelFormat::R16_SINT:
106 return "R16_SINT";
107 case PixelFormat::R16G16_UNORM:
108 return "R16G16_UNORM";
109 case PixelFormat::R16G16_FLOAT:
110 return "R16G16_FLOAT";
111 case PixelFormat::R16G16_UINT:
112 return "R16G16_UINT";
113 case PixelFormat::R16G16_SINT:
114 return "R16G16_SINT";
115 case PixelFormat::R16G16_SNORM:
116 return "R16G16_SNORM";
117 case PixelFormat::R32G32B32_FLOAT:
118 return "R32G32B32_FLOAT";
119 case PixelFormat::A8B8G8R8_SRGB:
120 return "A8B8G8R8_SRGB";
121 case PixelFormat::R8G8_UNORM:
122 return "R8G8_UNORM";
123 case PixelFormat::R8G8_SNORM:
124 return "R8G8_SNORM";
125 case PixelFormat::R8G8_SINT:
126 return "R8G8_SINT";
127 case PixelFormat::R8G8_UINT:
128 return "R8G8_UINT";
129 case PixelFormat::R32G32_UINT:
130 return "R32G32_UINT";
131 case PixelFormat::R16G16B16X16_FLOAT:
132 return "R16G16B16X16_FLOAT";
133 case PixelFormat::R32_UINT:
134 return "R32_UINT";
135 case PixelFormat::R32_SINT:
136 return "R32_SINT";
137 case PixelFormat::ASTC_2D_8X8_UNORM:
138 return "ASTC_2D_8X8_UNORM";
139 case PixelFormat::ASTC_2D_8X5_UNORM:
140 return "ASTC_2D_8X5_UNORM";
141 case PixelFormat::ASTC_2D_5X4_UNORM:
142 return "ASTC_2D_5X4_UNORM";
143 case PixelFormat::B8G8R8A8_SRGB:
144 return "B8G8R8A8_SRGB";
145 case PixelFormat::BC1_RGBA_SRGB:
146 return "BC1_RGBA_SRGB";
147 case PixelFormat::BC2_SRGB:
148 return "BC2_SRGB";
149 case PixelFormat::BC3_SRGB:
150 return "BC3_SRGB";
151 case PixelFormat::BC7_SRGB:
152 return "BC7_SRGB";
153 case PixelFormat::A4B4G4R4_UNORM:
154 return "A4B4G4R4_UNORM";
155 case PixelFormat::ASTC_2D_4X4_SRGB:
156 return "ASTC_2D_4X4_SRGB";
157 case PixelFormat::ASTC_2D_8X8_SRGB:
158 return "ASTC_2D_8X8_SRGB";
159 case PixelFormat::ASTC_2D_8X5_SRGB:
160 return "ASTC_2D_8X5_SRGB";
161 case PixelFormat::ASTC_2D_5X4_SRGB:
162 return "ASTC_2D_5X4_SRGB";
163 case PixelFormat::ASTC_2D_5X5_UNORM:
164 return "ASTC_2D_5X5_UNORM";
165 case PixelFormat::ASTC_2D_5X5_SRGB:
166 return "ASTC_2D_5X5_SRGB";
167 case PixelFormat::ASTC_2D_10X8_UNORM:
168 return "ASTC_2D_10X8_UNORM";
169 case PixelFormat::ASTC_2D_10X8_SRGB:
170 return "ASTC_2D_10X8_SRGB";
171 case PixelFormat::ASTC_2D_6X6_UNORM:
172 return "ASTC_2D_6X6_UNORM";
173 case PixelFormat::ASTC_2D_6X6_SRGB:
174 return "ASTC_2D_6X6_SRGB";
175 case PixelFormat::ASTC_2D_10X10_UNORM:
176 return "ASTC_2D_10X10_UNORM";
177 case PixelFormat::ASTC_2D_10X10_SRGB:
178 return "ASTC_2D_10X10_SRGB";
179 case PixelFormat::ASTC_2D_12X12_UNORM:
180 return "ASTC_2D_12X12_UNORM";
181 case PixelFormat::ASTC_2D_12X12_SRGB:
182 return "ASTC_2D_12X12_SRGB";
183 case PixelFormat::ASTC_2D_8X6_UNORM:
184 return "ASTC_2D_8X6_UNORM";
185 case PixelFormat::ASTC_2D_8X6_SRGB:
186 return "ASTC_2D_8X6_SRGB";
187 case PixelFormat::ASTC_2D_6X5_UNORM:
188 return "ASTC_2D_6X5_UNORM";
189 case PixelFormat::ASTC_2D_6X5_SRGB:
190 return "ASTC_2D_6X5_SRGB";
191 case PixelFormat::E5B9G9R9_FLOAT:
192 return "E5B9G9R9_FLOAT";
193 case PixelFormat::D32_FLOAT:
194 return "D32_FLOAT";
195 case PixelFormat::D16_UNORM:
196 return "D16_UNORM";
197 case PixelFormat::D24_UNORM_S8_UINT:
198 return "D24_UNORM_S8_UINT";
199 case PixelFormat::S8_UINT_D24_UNORM:
200 return "S8_UINT_D24_UNORM";
201 case PixelFormat::D32_FLOAT_S8_UINT:
202 return "D32_FLOAT_S8_UINT";
203 case PixelFormat::MaxDepthStencilFormat:
204 case PixelFormat::Invalid:
205 return "Invalid";
206 }
207 return "Invalid";
208 }();
209 return formatter<string_view>::format(name, ctx);
210 }
211};
212
213template <>
214struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> {
215 template <typename FormatContext>
216 auto format(VideoCommon::ImageType type, FormatContext& ctx) {
217 const string_view name = [type] {
218 using VideoCommon::ImageType;
219 switch (type) {
220 case ImageType::e1D:
221 return "1D";
222 case ImageType::e2D:
223 return "2D";
224 case ImageType::e3D:
225 return "3D";
226 case ImageType::Linear:
227 return "Linear";
228 case ImageType::Buffer:
229 return "Buffer";
230 }
231 return "Invalid";
232 }();
233 return formatter<string_view>::format(name, ctx);
234 }
235};
236
237template <>
238struct fmt::formatter<VideoCommon::Extent3D> {
239 constexpr auto parse(fmt::format_parse_context& ctx) {
240 return ctx.begin();
241 }
242
243 template <typename FormatContext>
244 auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) {
245 return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height,
246 extent.depth);
247 }
248};
249
250namespace VideoCommon {
251
252struct ImageBase;
253struct ImageViewBase;
254struct RenderTargets;
255
256[[nodiscard]] std::string Name(const ImageBase& image);
257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view,
259 std::optional<ImageViewType> type = std::nullopt);
260
261[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262
263} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
new file mode 100644
index 000000000..448a05fcc
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -0,0 +1,216 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/formatter.h"
13#include "video_core/texture_cache/image_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/util.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::DefaultBlockHeight;
20using VideoCore::Surface::DefaultBlockWidth;
21
22namespace {
23/// Returns the base layer and mip level offset
24[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) {
25 if (layer_stride == 0) {
26 return {0, diff};
27 } else {
28 return {diff / layer_stride, diff % layer_stride};
29 }
30}
31
32[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) {
33 return layers.base_level < info.resources.levels &&
34 layers.base_layer + layers.num_layers <= info.resources.layers;
35}
36
37[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) {
38 const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level);
39 const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level);
40 if (!ValidateLayers(copy.src_subresource, src)) {
41 return false;
42 }
43 if (!ValidateLayers(copy.dst_subresource, dst)) {
44 return false;
45 }
46 if (copy.src_offset.x + copy.extent.width > src_size.width ||
47 copy.src_offset.y + copy.extent.height > src_size.height ||
48 copy.src_offset.z + copy.extent.depth > src_size.depth) {
49 return false;
50 }
51 if (copy.dst_offset.x + copy.extent.width > dst_size.width ||
52 copy.dst_offset.y + copy.extent.height > dst_size.height ||
53 copy.dst_offset.z + copy.extent.depth > dst_size.depth) {
54 return false;
55 }
56 return true;
57}
58} // Anonymous namespace
59
60ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
61 : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
62 unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
63 converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
64 cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
65 mip_level_offsets{CalculateMipLevelOffsets(info)} {
66 if (info.type == ImageType::e3D) {
67 slice_offsets = CalculateSliceOffsets(info);
68 slice_subresources = CalculateSliceSubresources(info);
69 }
70}
71
72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
73 if (other_addr < gpu_addr) {
74 // Subresource address can't be lower than the base
75 return std::nullopt;
76 }
77 const u32 diff = static_cast<u32>(other_addr - gpu_addr);
78 if (diff > guest_size_bytes) {
79 // This can happen when two CPU addresses are used for different GPU addresses
80 return std::nullopt;
81 }
82 if (info.type != ImageType::e3D) {
83 const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
84 const auto end = mip_level_offsets.begin() + info.resources.levels;
85 const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
86 if (layer > info.resources.layers || it == end) {
87 return std::nullopt;
88 }
89 return SubresourceBase{
90 .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)),
91 .layer = layer,
92 };
93 } else {
94 // TODO: Consider using binary_search after a threshold
95 const auto it = std::ranges::find(slice_offsets, diff);
96 if (it == slice_offsets.cend()) {
97 return std::nullopt;
98 }
99 return slice_subresources[std::distance(slice_offsets.begin(), it)];
100 }
101}
102
103ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept {
104 const auto it = std::ranges::find(image_view_infos, view_info);
105 if (it == image_view_infos.end()) {
106 return ImageViewId{};
107 }
108 return image_view_ids[std::distance(image_view_infos.begin(), it)];
109}
110
111void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) {
112 image_view_infos.push_back(view_info);
113 image_view_ids.push_back(image_view_id);
114}
115
116void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
117 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
118 ASSERT(lhs.info.type == rhs.info.type);
119 std::optional<SubresourceBase> base;
120 if (lhs.info.type == ImageType::Linear) {
121 base = SubresourceBase{.level = 0, .layer = 0};
122 } else {
123 base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS);
124 }
125 if (!base) {
126 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
127 return;
128 }
129 const PixelFormat lhs_format = lhs.info.format;
130 const PixelFormat rhs_format = rhs.info.format;
131 const Extent2D lhs_block{
132 .width = DefaultBlockWidth(lhs_format),
133 .height = DefaultBlockHeight(lhs_format),
134 };
135 const Extent2D rhs_block{
136 .width = DefaultBlockWidth(rhs_format),
137 .height = DefaultBlockHeight(rhs_format),
138 };
139 const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
140 const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
141 if (is_lhs_compressed && is_rhs_compressed) {
142 LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
143 return;
144 }
145 const s32 lhs_mips = lhs.info.resources.levels;
146 const s32 rhs_mips = rhs.info.resources.levels;
147 const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
148 AliasedImage lhs_alias;
149 AliasedImage rhs_alias;
150 lhs_alias.id = rhs_id;
151 rhs_alias.id = lhs_id;
152 lhs_alias.copies.reserve(num_mips);
153 rhs_alias.copies.reserve(num_mips);
154 for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) {
155 Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
156 Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
157 if (is_lhs_compressed) {
158 lhs_size.width /= lhs_block.width;
159 lhs_size.height /= lhs_block.height;
160 }
161 if (is_rhs_compressed) {
162 rhs_size.width /= rhs_block.width;
163 rhs_size.height /= rhs_block.height;
164 }
165 const Extent3D copy_size{
166 .width = std::min(lhs_size.width, rhs_size.width),
167 .height = std::min(lhs_size.height, rhs_size.height),
168 .depth = std::min(lhs_size.depth, rhs_size.depth),
169 };
170 if (copy_size.width == 0 || copy_size.height == 0) {
171 LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased.");
172 continue;
173 }
174 const bool is_lhs_3d = lhs.info.type == ImageType::e3D;
175 const bool is_rhs_3d = rhs.info.type == ImageType::e3D;
176 const Offset3D lhs_offset{0, 0, 0};
177 const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0};
178 const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer;
179 const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers;
180 const s32 num_layers = std::min(lhs_layers, rhs_layers);
181 const SubresourceLayers lhs_subresource{
182 .base_level = mip_level,
183 .base_layer = 0,
184 .num_layers = num_layers,
185 };
186 const SubresourceLayers rhs_subresource{
187 .base_level = base->level + mip_level,
188 .base_layer = is_rhs_3d ? 0 : base->layer,
189 .num_layers = num_layers,
190 };
191 [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{
192 .src_subresource = lhs_subresource,
193 .dst_subresource = rhs_subresource,
194 .src_offset = lhs_offset,
195 .dst_offset = rhs_offset,
196 .extent = copy_size,
197 });
198 [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{
199 .src_subresource = rhs_subresource,
200 .dst_subresource = lhs_subresource,
201 .src_offset = rhs_offset,
202 .dst_offset = lhs_offset,
203 .extent = copy_size,
204 });
205 ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy");
206 ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy");
207 }
208 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
209 if (lhs_alias.copies.empty()) {
210 return;
211 }
212 lhs.aliased_images.push_back(std::move(lhs_alias));
213 rhs.aliased_images.push_back(std::move(rhs_alias));
214}
215
216} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
new file mode 100644
index 000000000..b7f3b7e43
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.h
@@ -0,0 +1,83 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <vector>
10
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/texture_cache/image_info.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19enum class ImageFlagBits : u32 {
20 AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
21 Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted
22 CpuModified = 1 << 2, ///< Contents have been modified from the CPU
23 GpuModified = 1 << 3, ///< Contents have been modified from the GPU
24 Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28};
29DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
30
31struct ImageViewInfo;
32
33struct AliasedImage {
34 std::vector<ImageCopy> copies;
35 ImageId id;
36};
37
38struct ImageBase {
39 explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
40
41 [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
42
43 [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
44
45 void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
46
47 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
48 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
49 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
50 }
51
52 ImageInfo info;
53
54 u32 guest_size_bytes = 0;
55 u32 unswizzled_size_bytes = 0;
56 u32 converted_size_bytes = 0;
57 ImageFlagBits flags = ImageFlagBits::CpuModified;
58
59 GPUVAddr gpu_addr = 0;
60 VAddr cpu_addr = 0;
61 VAddr cpu_addr_end = 0;
62
63 u64 modification_tick = 0;
64 u64 frame_tick = 0;
65
66 std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
67
68 std::vector<ImageViewInfo> image_view_infos;
69 std::vector<ImageViewId> image_view_ids;
70
71 std::vector<u32> slice_offsets;
72 std::vector<SubresourceBase> slice_subresources;
73
74 std::vector<AliasedImage> aliased_images;
75};
76
77struct ImageAllocBase {
78 std::vector<ImageId> images;
79};
80
81void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
82
83} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
new file mode 100644
index 000000000..64fd7010a
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -0,0 +1,189 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/surface.h"
7#include "video_core/texture_cache/format_lookup_table.h"
8#include "video_core/texture_cache/image_info.h"
9#include "video_core/texture_cache/samples_helper.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/texture_cache/util.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::TextureType;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20ImageInfo::ImageInfo(const TICEntry& config) noexcept {
21 format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
22 config.a_type, config.srgb_conversion);
23 num_samples = NumSamples(config.msaa_mode);
24 resources.levels = config.max_mip_level + 1;
25 if (config.IsPitchLinear()) {
26 pitch = config.Pitch();
27 } else if (config.IsBlockLinear()) {
28 block = Extent3D{
29 .width = config.block_width,
30 .height = config.block_height,
31 .depth = config.block_depth,
32 };
33 }
34 tile_width_spacing = config.tile_width_spacing;
35 if (config.texture_type != TextureType::Texture2D &&
36 config.texture_type != TextureType::Texture2DNoMipmap) {
37 ASSERT(!config.IsPitchLinear());
38 }
39 switch (config.texture_type) {
40 case TextureType::Texture1D:
41 ASSERT(config.BaseLayer() == 0);
42 type = ImageType::e1D;
43 size.width = config.Width();
44 break;
45 case TextureType::Texture1DArray:
46 UNIMPLEMENTED_IF(config.BaseLayer() != 0);
47 type = ImageType::e1D;
48 size.width = config.Width();
49 resources.layers = config.Depth();
50 break;
51 case TextureType::Texture2D:
52 case TextureType::Texture2DNoMipmap:
53 ASSERT(config.Depth() == 1);
54 type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
55 size.width = config.Width();
56 size.height = config.Height();
57 resources.layers = config.BaseLayer() + 1;
58 break;
59 case TextureType::Texture2DArray:
60 type = ImageType::e2D;
61 size.width = config.Width();
62 size.height = config.Height();
63 resources.layers = config.BaseLayer() + config.Depth();
64 break;
65 case TextureType::TextureCubemap:
66 ASSERT(config.Depth() == 1);
67 type = ImageType::e2D;
68 size.width = config.Width();
69 size.height = config.Height();
70 resources.layers = config.BaseLayer() + 6;
71 break;
72 case TextureType::TextureCubeArray:
73 UNIMPLEMENTED_IF(config.load_store_hint != 0);
74 type = ImageType::e2D;
75 size.width = config.Width();
76 size.height = config.Height();
77 resources.layers = config.BaseLayer() + config.Depth() * 6;
78 break;
79 case TextureType::Texture3D:
80 ASSERT(config.BaseLayer() == 0);
81 type = ImageType::e3D;
82 size.width = config.Width();
83 size.height = config.Height();
84 size.depth = config.Depth();
85 break;
86 case TextureType::Texture1DBuffer:
87 type = ImageType::Buffer;
88 size.width = config.Width();
89 break;
90 default:
91 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
92 break;
93 }
94 if (type != ImageType::Linear) {
95 // FIXME: Call this without passing *this
96 layer_stride = CalculateLayerStride(*this);
97 maybe_unaligned_layer_stride = CalculateLayerSize(*this);
98 }
99}
100
101ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
102 const auto& rt = regs.rt[index];
103 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
104 if (rt.tile_mode.is_pitch_linear) {
105 ASSERT(rt.tile_mode.is_3d == 0);
106 type = ImageType::Linear;
107 pitch = rt.width;
108 size = Extent3D{
109 .width = pitch / BytesPerBlock(format),
110 .height = rt.height,
111 .depth = 1,
112 };
113 return;
114 }
115 size.width = rt.width;
116 size.height = rt.height;
117 layer_stride = rt.layer_stride * 4;
118 maybe_unaligned_layer_stride = layer_stride;
119 num_samples = NumSamples(regs.multisample_mode);
120 block = Extent3D{
121 .width = rt.tile_mode.block_width,
122 .height = rt.tile_mode.block_height,
123 .depth = rt.tile_mode.block_depth,
124 };
125 if (rt.tile_mode.is_3d) {
126 type = ImageType::e3D;
127 size.depth = rt.depth;
128 } else {
129 type = ImageType::e2D;
130 resources.layers = rt.depth;
131 }
132}
133
134ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
135 format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
136 size.width = regs.zeta_width;
137 size.height = regs.zeta_height;
138 resources.levels = 1;
139 layer_stride = regs.zeta.layer_stride * 4;
140 maybe_unaligned_layer_stride = layer_stride;
141 num_samples = NumSamples(regs.multisample_mode);
142 block = Extent3D{
143 .width = regs.zeta.tile_mode.block_width,
144 .height = regs.zeta.tile_mode.block_height,
145 .depth = regs.zeta.tile_mode.block_depth,
146 };
147 if (regs.zeta.tile_mode.is_pitch_linear) {
148 ASSERT(regs.zeta.tile_mode.is_3d == 0);
149 type = ImageType::Linear;
150 pitch = size.width * BytesPerBlock(format);
151 } else if (regs.zeta.tile_mode.is_3d) {
152 ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0);
153 type = ImageType::e3D;
154 size.depth = regs.zeta_depth;
155 } else {
156 type = ImageType::e2D;
157 resources.layers = regs.zeta_depth;
158 }
159}
160
161ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
162 UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
163 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
164 if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
165 type = ImageType::Linear;
166 size = Extent3D{
167 .width = config.pitch / VideoCore::Surface::BytesPerBlock(format),
168 .height = config.height,
169 .depth = 1,
170 };
171 pitch = config.pitch;
172 } else {
173 type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
174 block = Extent3D{
175 .width = config.block_width,
176 .height = config.block_height,
177 .depth = config.block_depth,
178 };
179 // 3D blits with more than once slice are not implemented for now
180 // Render to individual slices
181 size = Extent3D{
182 .width = config.width,
183 .height = config.height,
184 .depth = 1,
185 };
186 }
187}
188
189} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
new file mode 100644
index 000000000..5049fc36e
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.h
@@ -0,0 +1,38 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14using Tegra::Texture::TICEntry;
15using VideoCore::Surface::PixelFormat;
16
17struct ImageInfo {
18 explicit ImageInfo() = default;
19 explicit ImageInfo(const TICEntry& config) noexcept;
20 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
21 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
22 explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept;
23
24 PixelFormat format = PixelFormat::Invalid;
25 ImageType type = ImageType::e1D;
26 SubresourceExtent resources;
27 Extent3D size{1, 1, 1};
28 union {
29 Extent3D block{0, 0, 0};
30 u32 pitch;
31 };
32 u32 layer_stride = 0;
33 u32 maybe_unaligned_layer_stride = 0;
34 u32 num_samples = 1;
35 u32 tile_width_spacing = 0;
36};
37
38} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
new file mode 100644
index 000000000..076a4bcfd
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/assert.h"
8#include "core/settings.h"
9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/formatter.h"
12#include "video_core/texture_cache/image_info.h"
13#include "video_core/texture_cache/image_view_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
20 ImageId image_id_)
21 : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
22 size{
23 .width = std::max(image_info.size.width >> range.base.level, 1u),
24 .height = std::max(image_info.size.height >> range.base.level, 1u),
25 .depth = std::max(image_info.size.depth >> range.base.level, 1u),
26 } {
27 ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format),
28 "Image view format {} is incompatible with image format {}", info.format,
29 image_info.format);
30 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
31 if (image_info.type == ImageType::Linear && is_async) {
32 flags |= ImageViewFlagBits::PreemtiveDownload;
33 }
34 if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) {
35 flags |= ImageViewFlagBits::Slice;
36 }
37}
38
39ImageViewBase::ImageViewBase(const NullImageParams&) {}
40
41} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
new file mode 100644
index 000000000..73954167e
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -0,0 +1,47 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "video_core/surface.h"
9#include "video_core/texture_cache/types.h"
10
11namespace VideoCommon {
12
13using VideoCore::Surface::PixelFormat;
14
15struct ImageViewInfo;
16struct ImageInfo;
17
18struct NullImageParams {};
19
20enum class ImageViewFlagBits : u16 {
21 PreemtiveDownload = 1 << 0,
22 Strong = 1 << 1,
23 Slice = 1 << 2,
24};
25DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
26
27struct ImageViewBase {
28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
29 ImageId image_id);
30 explicit ImageViewBase(const NullImageParams&);
31
32 [[nodiscard]] bool IsBuffer() const noexcept {
33 return type == ImageViewType::Buffer;
34 }
35
36 ImageId image_id{};
37 PixelFormat format{};
38 ImageViewType type{};
39 SubresourceRange range;
40 Extent3D size{0, 0, 0};
41 ImageViewFlagBits flags{};
42
43 u64 invalidation_tick = 0;
44 u64 modification_tick = 0;
45};
46
47} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
new file mode 100644
index 000000000..faf5b151f
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -0,0 +1,88 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/assert.h"
8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/textures/texture.h"
12
13namespace VideoCommon {
14
15namespace {
16
17constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
18
19[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
20 const u8 casted = static_cast<u8>(source);
21 ASSERT(static_cast<SwizzleSource>(casted) == source);
22 return casted;
23}
24
25} // Anonymous namespace
26
27ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
28 : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)},
29 y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)},
30 w_source{CastSwizzle(config.w_source)} {
31 range.base = SubresourceBase{
32 .level = static_cast<s32>(config.res_min_mip_level),
33 .layer = base_layer,
34 };
35 range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1;
36
37 switch (config.texture_type) {
38 case TextureType::Texture1D:
39 ASSERT(config.Height() == 1);
40 ASSERT(config.Depth() == 1);
41 type = ImageViewType::e1D;
42 break;
43 case TextureType::Texture2D:
44 case TextureType::Texture2DNoMipmap:
45 ASSERT(config.Depth() == 1);
46 type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect;
47 break;
48 case TextureType::Texture3D:
49 type = ImageViewType::e3D;
50 break;
51 case TextureType::TextureCubemap:
52 ASSERT(config.Depth() == 1);
53 type = ImageViewType::Cube;
54 range.extent.layers = 6;
55 break;
56 case TextureType::Texture1DArray:
57 type = ImageViewType::e1DArray;
58 range.extent.layers = config.Depth();
59 break;
60 case TextureType::Texture2DArray:
61 type = ImageViewType::e2DArray;
62 range.extent.layers = config.Depth();
63 break;
64 case TextureType::Texture1DBuffer:
65 type = ImageViewType::Buffer;
66 break;
67 case TextureType::TextureCubeArray:
68 type = ImageViewType::CubeArray;
69 range.extent.layers = config.Depth() * 6;
70 break;
71 default:
72 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
73 break;
74 }
75}
76
77ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_,
78 SubresourceRange range_) noexcept
79 : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE},
80 y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE},
81 w_source{RENDER_TARGET_SWIZZLE} {}
82
83bool ImageViewInfo::IsRenderTarget() const noexcept {
84 return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE &&
85 z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE;
86}
87
88} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
new file mode 100644
index 000000000..0c1f99117
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9
10#include "video_core/surface.h"
11#include "video_core/texture_cache/types.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::SwizzleSource;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20/// Properties used to determine a image view
21struct ImageViewInfo {
22 explicit ImageViewInfo() noexcept = default;
23 explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept;
24 explicit ImageViewInfo(ImageViewType type, PixelFormat format,
25 SubresourceRange range = {}) noexcept;
26
27 auto operator<=>(const ImageViewInfo&) const noexcept = default;
28
29 [[nodiscard]] bool IsRenderTarget() const noexcept;
30
31 [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept {
32 return std::array{
33 static_cast<SwizzleSource>(x_source),
34 static_cast<SwizzleSource>(y_source),
35 static_cast<SwizzleSource>(z_source),
36 static_cast<SwizzleSource>(w_source),
37 };
38 }
39
40 ImageViewType type{};
41 PixelFormat format{};
42 SubresourceRange range;
43 u8 x_source = static_cast<u8>(SwizzleSource::R);
44 u8 y_source = static_cast<u8>(SwizzleSource::G);
45 u8 z_source = static_cast<u8>(SwizzleSource::B);
46 u8 w_source = static_cast<u8>(SwizzleSource::A);
47};
48static_assert(std::has_unique_object_representations_v<ImageViewInfo>);
49
50} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
new file mode 100644
index 000000000..9b9544b07
--- /dev/null
+++ b/src/video_core/texture_cache/render_targets.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <span>
9#include <utility>
10
11#include "common/bit_cast.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16/// Framebuffer properties used to lookup a framebuffer
17struct RenderTargets {
18 constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
19
20 constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
21 const auto contains = [elements](ImageViewId item) {
22 return std::ranges::find(elements, item) != elements.end();
23 };
24 return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
25 }
26
27 std::array<ImageViewId, NUM_RT> color_buffer_ids;
28 ImageViewId depth_buffer_id;
29 std::array<u8, NUM_RT> draw_buffers{};
30 Extent2D size;
31};
32
33} // namespace VideoCommon
34
35namespace std {
36
37template <>
38struct hash<VideoCommon::RenderTargets> {
39 size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept {
40 using VideoCommon::ImageViewId;
41 size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id);
42 for (const ImageViewId color_buffer_id : rt.color_buffer_ids) {
43 value ^= std::hash<ImageViewId>{}(color_buffer_id);
44 }
45 value ^= Common::BitCast<u64>(rt.draw_buffers);
46 value ^= Common::BitCast<u64>(rt.size);
47 return value;
48 }
49};
50
51} // namespace std
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
new file mode 100644
index 000000000..04539a43c
--- /dev/null
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -0,0 +1,55 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/assert.h"
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
15 switch (num_samples) {
16 case 1:
17 return {0, 0};
18 case 2:
19 return {1, 0};
20 case 4:
21 return {1, 1};
22 case 8:
23 return {2, 1};
24 case 16:
25 return {2, 2};
26 }
27 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
28 return {1, 1};
29}
30
31[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
32 using Tegra::Texture::MsaaMode;
33 switch (msaa_mode) {
34 case MsaaMode::Msaa1x1:
35 return 1;
36 case MsaaMode::Msaa2x1:
37 case MsaaMode::Msaa2x1_D3D:
38 return 2;
39 case MsaaMode::Msaa2x2:
40 case MsaaMode::Msaa2x2_VC4:
41 case MsaaMode::Msaa2x2_VC12:
42 return 4;
43 case MsaaMode::Msaa4x2:
44 case MsaaMode::Msaa4x2_D3D:
45 case MsaaMode::Msaa4x2_VC8:
46 case MsaaMode::Msaa4x2_VC24:
47 return 8;
48 case MsaaMode::Msaa4x4:
49 return 16;
50 }
51 UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
52 return 1;
53}
54
55} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
new file mode 100644
index 000000000..eae3be6ea
--- /dev/null
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -0,0 +1,156 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <concepts>
9#include <numeric>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16
17namespace VideoCommon {
18
19struct SlotId {
20 static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
21
22 constexpr auto operator<=>(const SlotId&) const noexcept = default;
23
24 constexpr explicit operator bool() const noexcept {
25 return index != INVALID_INDEX;
26 }
27
28 u32 index = INVALID_INDEX;
29};
30
31template <class T>
32requires std::is_nothrow_move_assignable_v<T>&&
33 std::is_nothrow_move_constructible_v<T> class SlotVector {
34public:
35 ~SlotVector() noexcept {
36 size_t index = 0;
37 for (u64 bits : stored_bitset) {
38 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
39 if ((bits & 1) != 0) {
40 values[index + bit].object.~T();
41 }
42 }
43 index += 64;
44 }
45 delete[] values;
46 }
47
48 [[nodiscard]] T& operator[](SlotId id) noexcept {
49 ValidateIndex(id);
50 return values[id.index].object;
51 }
52
53 [[nodiscard]] const T& operator[](SlotId id) const noexcept {
54 ValidateIndex(id);
55 return values[id.index].object;
56 }
57
58 template <typename... Args>
59 [[nodiscard]] SlotId insert(Args&&... args) noexcept {
60 const u32 index = FreeValueIndex();
61 new (&values[index].object) T(std::forward<Args>(args)...);
62 SetStorageBit(index);
63
64 return SlotId{index};
65 }
66
67 void erase(SlotId id) noexcept {
68 values[id.index].object.~T();
69 free_list.push_back(id.index);
70 ResetStorageBit(id.index);
71 }
72
73private:
74 struct NonTrivialDummy {
75 NonTrivialDummy() noexcept {}
76 };
77
78 union Entry {
79 Entry() noexcept : dummy{} {}
80 ~Entry() noexcept {}
81
82 NonTrivialDummy dummy;
83 T object;
84 };
85
86 void SetStorageBit(u32 index) noexcept {
87 stored_bitset[index / 64] |= u64(1) << (index % 64);
88 }
89
90 void ResetStorageBit(u32 index) noexcept {
91 stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
92 }
93
94 bool ReadStorageBit(u32 index) noexcept {
95 return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
96 }
97
98 void ValidateIndex(SlotId id) const noexcept {
99 DEBUG_ASSERT(id);
100 DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
101 DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
102 }
103
104 [[nodiscard]] u32 FreeValueIndex() noexcept {
105 if (free_list.empty()) {
106 Reserve(values_capacity ? (values_capacity << 1) : 1);
107 }
108 const u32 free_index = free_list.back();
109 free_list.pop_back();
110 return free_index;
111 }
112
113 void Reserve(size_t new_capacity) noexcept {
114 Entry* const new_values = new Entry[new_capacity];
115 size_t index = 0;
116 for (u64 bits : stored_bitset) {
117 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
118 const size_t i = index + bit;
119 if ((bits & 1) == 0) {
120 continue;
121 }
122 T& old_value = values[i].object;
123 new (&new_values[i].object) T(std::move(old_value));
124 old_value.~T();
125 }
126 index += 64;
127 }
128
129 stored_bitset.resize((new_capacity + 63) / 64);
130
131 const size_t old_free_size = free_list.size();
132 free_list.resize(old_free_size + (new_capacity - values_capacity));
133 std::iota(free_list.begin() + old_free_size, free_list.end(),
134 static_cast<u32>(values_capacity));
135
136 delete[] values;
137 values = new_values;
138 values_capacity = new_capacity;
139 }
140
141 Entry* values = nullptr;
142 size_t values_capacity = 0;
143 size_t values_size = 0;
144
145 std::vector<u64> stored_bitset;
146 std::vector<u32> free_list;
147};
148
149} // namespace VideoCommon
150
151template <>
152struct std::hash<VideoCommon::SlotId> {
153 size_t operator()(const VideoCommon::SlotId& id) const noexcept {
154 return std::hash<u32>{}(id.index);
155 }
156};
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
deleted file mode 100644
index b44c09d71..000000000
--- a/src/video_core/texture_cache/surface_base.cpp
+++ /dev/null
@@ -1,298 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/algorithm.h"
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/microprofile.h"
9#include "video_core/memory_manager.h"
10#include "video_core/texture_cache/surface_base.h"
11#include "video_core/texture_cache/surface_params.h"
12#include "video_core/textures/convert.h"
13
14namespace VideoCommon {
15
16MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
17MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
18
19using Tegra::Texture::ConvertFromGuestToHost;
20using VideoCore::MortonSwizzleMode;
21using VideoCore::Surface::IsPixelFormatASTC;
22using VideoCore::Surface::PixelFormat;
23
24StagingCache::StagingCache() = default;
25
26StagingCache::~StagingCache() = default;
27
28SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
29 bool is_astc_supported)
30 : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
31 mipmap_offsets(params.num_levels) {
32 is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
33 host_memory_size = params.GetHostSizeInBytes(is_converted);
34
35 std::size_t offset = 0;
36 for (u32 level = 0; level < params.num_levels; ++level) {
37 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
38 mipmap_sizes[level] = mipmap_size;
39 mipmap_offsets[level] = offset;
40 offset += mipmap_size;
41 }
42 layer_size = offset;
43 if (params.is_layered) {
44 if (params.is_tiled) {
45 layer_size =
46 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
47 }
48 guest_memory_size = layer_size * params.depth;
49 } else {
50 guest_memory_size = layer_size;
51 }
52}
53
54MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
55 const u32 src_bpp{params.GetBytesPerPixel()};
56 const u32 dst_bpp{rhs.GetBytesPerPixel()};
57 const bool ib1 = params.IsBuffer();
58 const bool ib2 = rhs.IsBuffer();
59 if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
60 const bool cb1 = params.IsCompressed();
61 const bool cb2 = rhs.IsCompressed();
62 if (cb1 == cb2) {
63 return MatchTopologyResult::FullMatch;
64 }
65 return MatchTopologyResult::CompressUnmatch;
66 }
67 return MatchTopologyResult::None;
68}
69
70MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
71 // Buffer surface Check
72 if (params.IsBuffer()) {
73 const std::size_t wd1 = params.width * params.GetBytesPerPixel();
74 const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
75 if (wd1 == wd2) {
76 return MatchStructureResult::FullMatch;
77 }
78 return MatchStructureResult::None;
79 }
80
81 // Linear Surface check
82 if (!params.is_tiled) {
83 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
84 if (params.width == rhs.width) {
85 return MatchStructureResult::FullMatch;
86 } else {
87 return MatchStructureResult::SemiMatch;
88 }
89 }
90 return MatchStructureResult::None;
91 }
92
93 // Tiled Surface check
94 if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
95 params.tile_width_spacing, params.num_levels) ==
96 std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
97 rhs.tile_width_spacing, rhs.num_levels)) {
98 if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
99 return MatchStructureResult::FullMatch;
100 }
101 const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
102 rhs.pixel_format);
103 const u32 hs =
104 SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
105 const u32 w1 = params.GetBlockAlignedWidth();
106 if (std::tie(w1, params.height) == std::tie(ws, hs)) {
107 return MatchStructureResult::SemiMatch;
108 }
109 }
110 return MatchStructureResult::None;
111}
112
113std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
114 const GPUVAddr candidate_gpu_addr) const {
115 if (gpu_addr == candidate_gpu_addr) {
116 return {{0, 0}};
117 }
118
119 if (candidate_gpu_addr < gpu_addr) {
120 return std::nullopt;
121 }
122
123 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
124 const auto layer{static_cast<u32>(relative_address / layer_size)};
125 if (layer >= params.depth) {
126 return std::nullopt;
127 }
128
129 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
130 const auto mipmap_it =
131 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
132 if (mipmap_it == mipmap_offsets.end()) {
133 return std::nullopt;
134 }
135
136 const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
137 return std::make_pair(layer, level);
138}
139
140std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
141 const u32 layers{params.depth};
142 const u32 mipmaps{params.num_levels};
143 std::vector<CopyParams> result;
144 result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
145
146 for (u32 layer = 0; layer < layers; layer++) {
147 for (u32 level = 0; level < mipmaps; level++) {
148 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
149 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
150 result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
151 }
152 }
153 return result;
154}
155
156std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
157 const u32 mipmaps{params.num_levels};
158 std::vector<CopyParams> result;
159 result.reserve(mipmaps);
160
161 for (u32 level = 0; level < mipmaps; level++) {
162 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
163 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
164 const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
165 result.emplace_back(width, height, depth, level);
166 }
167 return result;
168}
169
170void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,
171 u8* buffer, u32 level) {
172 const u32 width{params.GetMipWidth(level)};
173 const u32 height{params.GetMipHeight(level)};
174 const u32 block_height{params.GetMipBlockHeight(level)};
175 const u32 block_depth{params.GetMipBlockDepth(level)};
176
177 std::size_t guest_offset{mipmap_offsets[level]};
178 if (params.is_layered) {
179 std::size_t host_offset = 0;
180 const std::size_t guest_stride = layer_size;
181 const std::size_t host_stride = params.GetHostLayerSize(level);
182 for (u32 layer = 0; layer < params.depth; ++layer) {
183 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1,
184 params.tile_width_spacing, buffer + host_offset, memory + guest_offset);
185 guest_offset += guest_stride;
186 host_offset += host_stride;
187 }
188 } else {
189 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth,
190 params.GetMipDepth(level), params.tile_width_spacing, buffer,
191 memory + guest_offset);
192 }
193}
194
195void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
196 StagingCache& staging_cache) {
197 MICROPROFILE_SCOPE(GPU_Load_Texture);
198 auto& staging_buffer = staging_cache.GetBuffer(0);
199 u8* host_ptr;
200 // Use an extra temporal buffer
201 auto& tmp_buffer = staging_cache.GetBuffer(1);
202 tmp_buffer.resize(guest_memory_size);
203 host_ptr = tmp_buffer.data();
204 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
205
206 if (params.is_tiled) {
207 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
208 params.block_width, static_cast<u32>(params.target));
209 for (u32 level = 0; level < params.num_levels; ++level) {
210 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
211 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
212 staging_buffer.data() + host_offset, level);
213 }
214 } else {
215 ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
216 const u32 bpp{params.GetBytesPerPixel()};
217 const u32 block_width{params.GetDefaultBlockWidth()};
218 const u32 block_height{params.GetDefaultBlockHeight()};
219 const u32 width{(params.width + block_width - 1) / block_width};
220 const u32 height{(params.height + block_height - 1) / block_height};
221 const u32 copy_size{width * bpp};
222 if (params.pitch == copy_size) {
223 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
224 } else {
225 const u8* start{host_ptr};
226 u8* write_to{staging_buffer.data()};
227 for (u32 h = height; h > 0; --h) {
228 std::memcpy(write_to, start, copy_size);
229 start += params.pitch;
230 write_to += copy_size;
231 }
232 }
233 }
234
235 if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
236 return;
237 }
238
239 for (u32 level = params.num_levels; level--;) {
240 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
241 const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
242 u8* const in_buffer = staging_buffer.data() + in_host_offset;
243 u8* const out_buffer = staging_buffer.data() + out_host_offset;
244 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
245 params.GetMipWidth(level), params.GetMipHeight(level),
246 params.GetMipDepth(level), true, true);
247 }
248}
249
250void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
251 StagingCache& staging_cache) {
252 MICROPROFILE_SCOPE(GPU_Flush_Texture);
253 auto& staging_buffer = staging_cache.GetBuffer(0);
254 u8* host_ptr;
255
256 // Use an extra temporal buffer
257 auto& tmp_buffer = staging_cache.GetBuffer(1);
258 tmp_buffer.resize(guest_memory_size);
259 host_ptr = tmp_buffer.data();
260
261 if (params.target == SurfaceTarget::Texture3D) {
262 // Special case for 3D texture segments
263 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
264 }
265
266 if (params.is_tiled) {
267 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
268 for (u32 level = 0; level < params.num_levels; ++level) {
269 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
270 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
271 staging_buffer.data() + host_offset, level);
272 }
273 } else if (params.IsBuffer()) {
274 // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
275 // memory.
276 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
277 } else {
278 ASSERT(params.target == SurfaceTarget::Texture2D);
279 ASSERT(params.num_levels == 1);
280
281 const u32 bpp{params.GetBytesPerPixel()};
282 const u32 copy_size{params.width * bpp};
283 if (params.pitch == copy_size) {
284 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
285 } else {
286 u8* start{host_ptr};
287 const u8* read_to{staging_buffer.data()};
288 for (u32 h = params.height; h > 0; --h) {
289 std::memcpy(start, read_to, copy_size);
290 start += params.pitch;
291 read_to += copy_size;
292 }
293 }
294 }
295 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
296}
297
298} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
deleted file mode 100644
index 173f2edba..000000000
--- a/src/video_core/texture_cache/surface_base.h
+++ /dev/null
@@ -1,333 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <tuple>
9#include <unordered_map>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/gpu.h"
14#include "video_core/morton.h"
15#include "video_core/texture_cache/copy_params.h"
16#include "video_core/texture_cache/surface_params.h"
17#include "video_core/texture_cache/surface_view.h"
18
19namespace Tegra {
20class MemoryManager;
21}
22
23namespace VideoCommon {
24
25using VideoCore::MortonSwizzleMode;
26using VideoCore::Surface::SurfaceTarget;
27
28enum class MatchStructureResult : u32 {
29 FullMatch = 0,
30 SemiMatch = 1,
31 None = 2,
32};
33
34enum class MatchTopologyResult : u32 {
35 FullMatch = 0,
36 CompressUnmatch = 1,
37 None = 2,
38};
39
40class StagingCache {
41public:
42 explicit StagingCache();
43 ~StagingCache();
44
45 std::vector<u8>& GetBuffer(std::size_t index) {
46 return staging_buffer[index];
47 }
48
49 const std::vector<u8>& GetBuffer(std::size_t index) const {
50 return staging_buffer[index];
51 }
52
53 void SetSize(std::size_t size) {
54 staging_buffer.resize(size);
55 }
56
57private:
58 std::vector<std::vector<u8>> staging_buffer;
59};
60
61class SurfaceBaseImpl {
62public:
63 void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
64
65 void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
66
67 GPUVAddr GetGpuAddr() const {
68 return gpu_addr;
69 }
70
71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cpu_addr < end) && (cpu_addr_end > start);
73 }
74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const {
76 const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
77 return gpu_addr <= other_start && other_end <= gpu_addr_end;
78 }
79
80 // Use only when recycling a surface
81 void SetGpuAddr(const GPUVAddr new_addr) {
82 gpu_addr = new_addr;
83 }
84
85 VAddr GetCpuAddr() const {
86 return cpu_addr;
87 }
88
89 VAddr GetCpuAddrEnd() const {
90 return cpu_addr_end;
91 }
92
93 void SetCpuAddr(const VAddr new_addr) {
94 cpu_addr = new_addr;
95 cpu_addr_end = new_addr + guest_memory_size;
96 }
97
98 const SurfaceParams& GetSurfaceParams() const {
99 return params;
100 }
101
102 std::size_t GetSizeInBytes() const {
103 return guest_memory_size;
104 }
105
106 std::size_t GetHostSizeInBytes() const {
107 return host_memory_size;
108 }
109
110 std::size_t GetMipmapSize(const u32 level) const {
111 return mipmap_sizes[level];
112 }
113
114 bool IsLinear() const {
115 return !params.is_tiled;
116 }
117
118 bool IsConverted() const {
119 return is_converted;
120 }
121
122 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
123 return params.pixel_format == pixel_format;
124 }
125
126 VideoCore::Surface::PixelFormat GetFormat() const {
127 return params.pixel_format;
128 }
129
130 bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
131 return params.target == target;
132 }
133
134 MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
135
136 MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
137
138 bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
139 return std::tie(gpu_addr, params.target, params.num_levels) ==
140 std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
141 params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
142 }
143
144 std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
145
146 std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
147 return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
148 }
149
150protected:
151 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
152 bool is_astc_supported);
153 ~SurfaceBaseImpl() = default;
154
155 virtual void DecorateSurfaceName() = 0;
156
157 const SurfaceParams params;
158 std::size_t layer_size;
159 std::size_t guest_memory_size;
160 std::size_t host_memory_size;
161 GPUVAddr gpu_addr{};
162 VAddr cpu_addr{};
163 VAddr cpu_addr_end{};
164 bool is_converted{};
165
166 std::vector<std::size_t> mipmap_sizes;
167 std::vector<std::size_t> mipmap_offsets;
168
169private:
170 void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer,
171 u32 level);
172
173 std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
174
175 std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
176};
177
178template <typename TView>
179class SurfaceBase : public SurfaceBaseImpl {
180public:
181 virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
182
183 virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
184
185 void MarkAsModified(bool is_modified_, u64 tick) {
186 is_modified = is_modified_ || is_target;
187 modification_tick = tick;
188 }
189
190 void MarkAsRenderTarget(bool is_target_, u32 index_) {
191 is_target = is_target_;
192 index = index_;
193 }
194
195 void SetMemoryMarked(bool is_memory_marked_) {
196 is_memory_marked = is_memory_marked_;
197 }
198
199 bool IsMemoryMarked() const {
200 return is_memory_marked;
201 }
202
203 void SetSyncPending(bool is_sync_pending_) {
204 is_sync_pending = is_sync_pending_;
205 }
206
207 bool IsSyncPending() const {
208 return is_sync_pending;
209 }
210
211 void MarkAsPicked(bool is_picked_) {
212 is_picked = is_picked_;
213 }
214
215 bool IsModified() const {
216 return is_modified;
217 }
218
219 bool IsProtected() const {
220 // Only 3D slices are to be protected
221 return is_target && params.target == SurfaceTarget::Texture3D;
222 }
223
224 bool IsRenderTarget() const {
225 return is_target;
226 }
227
228 u32 GetRenderTarget() const {
229 return index;
230 }
231
232 bool IsRegistered() const {
233 return is_registered;
234 }
235
236 bool IsPicked() const {
237 return is_picked;
238 }
239
240 void MarkAsRegistered(bool is_reg) {
241 is_registered = is_reg;
242 }
243
244 u64 GetModificationTick() const {
245 return modification_tick;
246 }
247
248 TView EmplaceOverview(const SurfaceParams& overview_params) {
249 const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
251 }
252
253 TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
254 return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
255 base_level, num_levels));
256 }
257
258 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
259 const GPUVAddr view_addr,
260 const std::size_t candidate_size, const u32 mipmap,
261 const u32 layer) {
262 const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
263 if (!layer_mipmap) {
264 return {};
265 }
266 const auto [end_layer, end_mipmap] = *layer_mipmap;
267 if (layer != end_layer) {
268 if (mipmap == 0 && end_mipmap == 0) {
269 return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
270 }
271 return {};
272 } else {
273 return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
274 }
275 }
276
277 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
278 const std::size_t candidate_size) {
279 if (params.target == SurfaceTarget::Texture3D ||
280 view_params.target == SurfaceTarget::Texture3D ||
281 (params.num_levels == 1 && !params.is_layered)) {
282 return {};
283 }
284 const auto layer_mipmap{GetLayerMipmap(view_addr)};
285 if (!layer_mipmap) {
286 return {};
287 }
288 const auto [layer, mipmap] = *layer_mipmap;
289 if (GetMipmapSize(mipmap) != candidate_size) {
290 return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
291 }
292 return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
293 }
294
295 TView GetMainView() const {
296 return main_view;
297 }
298
299protected:
300 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
301 bool is_astc_supported)
302 : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
303
304 ~SurfaceBase() = default;
305
306 virtual TView CreateView(const ViewParams& view_key) = 0;
307
308 TView main_view;
309 std::unordered_map<ViewParams, TView> views;
310
311private:
312 TView GetView(const ViewParams& key) {
313 const auto [entry, is_cache_miss] = views.try_emplace(key);
314 auto& view{entry->second};
315 if (is_cache_miss) {
316 view = CreateView(key);
317 }
318 return view;
319 }
320
321 static constexpr u32 NO_RT = 0xFFFFFFFF;
322
323 bool is_modified{};
324 bool is_target{};
325 bool is_registered{};
326 bool is_picked{};
327 bool is_memory_marked{};
328 bool is_sync_pending{};
329 u32 index{NO_RT};
330 u64 modification_tick{};
331};
332
333} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
deleted file mode 100644
index 13dd16356..000000000
--- a/src/video_core/texture_cache/surface_params.cpp
+++ /dev/null
@@ -1,445 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "core/core.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/format_lookup_table.h"
15#include "video_core/texture_cache/surface_params.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::PixelFormat;
20using VideoCore::Surface::PixelFormatFromDepthFormat;
21using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
22using VideoCore::Surface::SurfaceTarget;
23using VideoCore::Surface::SurfaceTargetFromTextureType;
24using VideoCore::Surface::SurfaceType;
25
26namespace {
27
28SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
29 switch (type) {
30 case Tegra::Shader::TextureType::Texture1D:
31 return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
32 case Tegra::Shader::TextureType::Texture2D:
33 return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
34 case Tegra::Shader::TextureType::Texture3D:
35 ASSERT(!is_array);
36 return SurfaceTarget::Texture3D;
37 case Tegra::Shader::TextureType::TextureCube:
38 return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
39 default:
40 UNREACHABLE();
41 return SurfaceTarget::Texture2D;
42 }
43}
44
45SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
46 switch (type) {
47 case Tegra::Shader::ImageType::Texture1D:
48 return SurfaceTarget::Texture1D;
49 case Tegra::Shader::ImageType::TextureBuffer:
50 return SurfaceTarget::TextureBuffer;
51 case Tegra::Shader::ImageType::Texture1DArray:
52 return SurfaceTarget::Texture1DArray;
53 case Tegra::Shader::ImageType::Texture2D:
54 return SurfaceTarget::Texture2D;
55 case Tegra::Shader::ImageType::Texture2DArray:
56 return SurfaceTarget::Texture2DArray;
57 case Tegra::Shader::ImageType::Texture3D:
58 return SurfaceTarget::Texture3D;
59 default:
60 UNREACHABLE();
61 return SurfaceTarget::Texture2D;
62 }
63}
64
65constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
66 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
67}
68
69} // Anonymous namespace
70
71SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
72 const Tegra::Texture::TICEntry& tic,
73 const VideoCommon::Shader::Sampler& entry) {
74 SurfaceParams params;
75 params.is_tiled = tic.IsTiled();
76 params.srgb_conversion = tic.IsSrgbConversionEnabled();
77 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
78 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
79 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
80 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
81 params.pixel_format = lookup_table.GetPixelFormat(
82 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
83 params.type = GetFormatType(params.pixel_format);
84 if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
85 switch (params.pixel_format) {
86 case PixelFormat::R16_UNORM:
87 case PixelFormat::R16_FLOAT:
88 params.pixel_format = PixelFormat::D16_UNORM;
89 break;
90 case PixelFormat::R32_FLOAT:
91 params.pixel_format = PixelFormat::D32_FLOAT;
92 break;
93 default:
94 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
95 static_cast<u32>(params.pixel_format));
96 }
97 params.type = GetFormatType(params.pixel_format);
98 }
99 // TODO: on 1DBuffer we should use the tic info.
100 if (tic.IsBuffer()) {
101 params.target = SurfaceTarget::TextureBuffer;
102 params.width = tic.Width();
103 params.pitch = params.width * params.GetBytesPerPixel();
104 params.height = 1;
105 params.depth = 1;
106 params.num_levels = 1;
107 params.emulated_levels = 1;
108 params.is_layered = false;
109 } else {
110 params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array);
111 params.width = tic.Width();
112 params.height = tic.Height();
113 params.depth = tic.Depth();
114 params.pitch = params.is_tiled ? 0 : tic.Pitch();
115 if (params.target == SurfaceTarget::TextureCubemap ||
116 params.target == SurfaceTarget::TextureCubeArray) {
117 params.depth *= 6;
118 }
119 params.num_levels = tic.max_mip_level + 1;
120 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
121 params.is_layered = params.IsLayered();
122 }
123 return params;
124}
125
126SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
127 const Tegra::Texture::TICEntry& tic,
128 const VideoCommon::Shader::Image& entry) {
129 SurfaceParams params;
130 params.is_tiled = tic.IsTiled();
131 params.srgb_conversion = tic.IsSrgbConversionEnabled();
132 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
133 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
134 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
135 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
136 params.pixel_format = lookup_table.GetPixelFormat(
137 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
138 params.type = GetFormatType(params.pixel_format);
139 params.target = ImageTypeToSurfaceTarget(entry.type);
140 // TODO: on 1DBuffer we should use the tic info.
141 if (tic.IsBuffer()) {
142 params.target = SurfaceTarget::TextureBuffer;
143 params.width = tic.Width();
144 params.pitch = params.width * params.GetBytesPerPixel();
145 params.height = 1;
146 params.depth = 1;
147 params.num_levels = 1;
148 params.emulated_levels = 1;
149 params.is_layered = false;
150 } else {
151 params.width = tic.Width();
152 params.height = tic.Height();
153 params.depth = tic.Depth();
154 params.pitch = params.is_tiled ? 0 : tic.Pitch();
155 if (params.target == SurfaceTarget::TextureCubemap ||
156 params.target == SurfaceTarget::TextureCubeArray) {
157 params.depth *= 6;
158 }
159 params.num_levels = tic.max_mip_level + 1;
160 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
161 params.is_layered = params.IsLayered();
162 }
163 return params;
164}
165
166SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
167 const auto& regs = maxwell3d.regs;
168 const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
169 const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
170 const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
171 return {
172 .is_tiled = regs.zeta.memory_layout.type ==
173 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
174 .srgb_conversion = false,
175 .is_layered = is_layered,
176 .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
177 .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
178 .block_depth = block_depth,
179 .tile_width_spacing = 1,
180 .width = regs.zeta_width,
181 .height = regs.zeta_height,
182 .depth = is_layered ? regs.zeta_layers.Value() : 1U,
183 .pitch = 0,
184 .num_levels = 1,
185 .emulated_levels = 1,
186 .pixel_format = pixel_format,
187 .type = GetFormatType(pixel_format),
188 .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
189 };
190}
191
192SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
193 std::size_t index) {
194 const auto& config{maxwell3d.regs.rt[index]};
195 SurfaceParams params;
196 params.is_tiled =
197 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
198 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
199 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
200 params.block_width = config.memory_layout.block_width;
201 params.block_height = config.memory_layout.block_height;
202 params.block_depth = config.memory_layout.block_depth;
203 params.tile_width_spacing = 1;
204 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
205 params.type = GetFormatType(params.pixel_format);
206 if (params.is_tiled) {
207 params.pitch = 0;
208 params.width = config.width;
209 } else {
210 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
211 params.pitch = config.width;
212 params.width = params.pitch / bpp;
213 }
214 params.height = config.height;
215 params.num_levels = 1;
216 params.emulated_levels = 1;
217
218 if (config.memory_layout.is_3d != 0) {
219 params.depth = config.layers.Value();
220 params.is_layered = false;
221 params.target = SurfaceTarget::Texture3D;
222 } else if (config.layers > 1) {
223 params.depth = config.layers.Value();
224 params.is_layered = true;
225 params.target = SurfaceTarget::Texture2DArray;
226 } else {
227 params.depth = 1;
228 params.is_layered = false;
229 params.target = SurfaceTarget::Texture2D;
230 }
231 return params;
232}
233
234SurfaceParams SurfaceParams::CreateForFermiCopySurface(
235 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
236 const bool is_tiled = !config.linear;
237 const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
238
239 SurfaceParams params{
240 .is_tiled = is_tiled,
241 .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
242 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
243 .is_layered = false,
244 .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
245 .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
246 .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
247 .tile_width_spacing = 1,
248 .width = config.width,
249 .height = config.height,
250 .depth = 1,
251 .pitch = config.pitch,
252 .num_levels = 1,
253 .emulated_levels = 1,
254 .pixel_format = pixel_format,
255 .type = GetFormatType(pixel_format),
256 // TODO(Rodrigo): Try to guess texture arrays from parameters
257 .target = SurfaceTarget::Texture2D,
258 };
259
260 params.is_layered = params.IsLayered();
261 return params;
262}
263
264VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
265 const VideoCommon::Shader::Sampler& entry) {
266 return TextureTypeToSurfaceTarget(entry.type, entry.is_array);
267}
268
269VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
270 const VideoCommon::Shader::Image& entry) {
271 return ImageTypeToSurfaceTarget(entry.type);
272}
273
274bool SurfaceParams::IsLayered() const {
275 switch (target) {
276 case SurfaceTarget::Texture1DArray:
277 case SurfaceTarget::Texture2DArray:
278 case SurfaceTarget::TextureCubemap:
279 case SurfaceTarget::TextureCubeArray:
280 return true;
281 default:
282 return false;
283 }
284}
285
286// Auto block resizing algorithm from:
287// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
288u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
289 if (level == 0) {
290 return this->block_height;
291 }
292
293 const u32 height_new{GetMipHeight(level)};
294 const u32 default_block_height{GetDefaultBlockHeight()};
295 const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
296 const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
297 return std::clamp(block_height_new, 3U, 7U) - 3U;
298}
299
300u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
301 if (level == 0) {
302 return this->block_depth;
303 }
304 if (is_layered) {
305 return 0;
306 }
307
308 const u32 depth_new{GetMipDepth(level)};
309 const u32 block_depth_new = Common::Log2Ceil32(depth_new);
310 if (block_depth_new > 4) {
311 return 5 - (GetMipBlockHeight(level) >= 2);
312 }
313 return block_depth_new;
314}
315
316std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
317 std::size_t offset = 0;
318 for (u32 i = 0; i < level; i++) {
319 offset += GetInnerMipmapMemorySize(i, false, false);
320 }
321 return offset;
322}
323
324std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
325 std::size_t offset = 0;
326 if (is_converted) {
327 for (u32 i = 0; i < level; ++i) {
328 offset += GetConvertedMipmapSize(i) * GetNumLayers();
329 }
330 } else {
331 for (u32 i = 0; i < level; ++i) {
332 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
333 }
334 }
335 return offset;
336}
337
338std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
339 constexpr std::size_t rgba8_bpp = 4ULL;
340 const std::size_t mip_width = GetMipWidth(level);
341 const std::size_t mip_height = GetMipHeight(level);
342 const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
343 return mip_width * mip_height * mip_depth * rgba8_bpp;
344}
345
346std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
347 std::size_t size = 0;
348 for (u32 level = 0; level < num_levels; ++level) {
349 size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
350 }
351 if (is_tiled && is_layered) {
352 return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
353 }
354 return size;
355}
356
357std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
358 bool uncompressed) const {
359 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
360 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
361 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
362 if (is_tiled) {
363 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
364 depth, GetMipBlockHeight(level),
365 GetMipBlockDepth(level));
366 } else if (as_host_size || IsBuffer()) {
367 return GetBytesPerPixel() * width * height * depth;
368 } else {
369 // Linear Texture Case
370 return pitch * height * depth;
371 }
372}
373
374bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
375 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
376 height, depth, pitch, num_levels, pixel_format, type, target) ==
377 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
378 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
379 rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
380}
381
382std::string SurfaceParams::TargetName() const {
383 switch (target) {
384 case SurfaceTarget::Texture1D:
385 return "1D";
386 case SurfaceTarget::TextureBuffer:
387 return "TexBuffer";
388 case SurfaceTarget::Texture2D:
389 return "2D";
390 case SurfaceTarget::Texture3D:
391 return "3D";
392 case SurfaceTarget::Texture1DArray:
393 return "1DArray";
394 case SurfaceTarget::Texture2DArray:
395 return "2DArray";
396 case SurfaceTarget::TextureCubemap:
397 return "Cube";
398 case SurfaceTarget::TextureCubeArray:
399 return "CubeArray";
400 default:
401 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
402 UNREACHABLE();
403 return fmt::format("TUK({})", static_cast<u32>(target));
404 }
405}
406
407u32 SurfaceParams::GetBlockSize() const {
408 const u32 x = 64U << block_width;
409 const u32 y = 8U << block_height;
410 const u32 z = 1U << block_depth;
411 return x * y * z;
412}
413
414std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
415 const u32 x_pixels = 64U / GetBytesPerPixel();
416 const u32 x = x_pixels << block_width;
417 const u32 y = 8U << block_height;
418 return {x, y};
419}
420
421std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
422 const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
423 const u32 block_size = GetBlockSize();
424 const u32 block_index = offset / block_size;
425 const u32 gob_offset = offset % block_size;
426 const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
427 const u32 x_gob_pixels = 64U / GetBytesPerPixel();
428 const u32 x_block_pixels = x_gob_pixels << block_width;
429 const u32 y_block_pixels = 8U << block_height;
430 const u32 z_block_pixels = 1U << block_depth;
431 const u32 x_blocks = div_ceil(width, x_block_pixels);
432 const u32 y_blocks = div_ceil(height, y_block_pixels);
433 const u32 z_blocks = div_ceil(depth, z_block_pixels);
434 const u32 base_x = block_index % x_blocks;
435 const u32 base_y = (block_index / x_blocks) % y_blocks;
436 const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
437 u32 x = base_x * x_block_pixels;
438 u32 y = base_y * y_block_pixels;
439 u32 z = base_z * z_block_pixels;
440 z += gob_index >> block_height;
441 y += (gob_index * 8U) % y_block_pixels;
442 return {x, y, z};
443}
444
445} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
deleted file mode 100644
index 4466c3c34..000000000
--- a/src/video_core/texture_cache/surface_params.h
+++ /dev/null
@@ -1,294 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "common/cityhash.h"
12#include "common/common_types.h"
13#include "video_core/engines/fermi_2d.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/surface.h"
17#include "video_core/textures/decoders.h"
18
19namespace VideoCommon {
20
21class FormatLookupTable;
22
23class SurfaceParams {
24public:
25 /// Creates SurfaceCachedParams from a texture configuration.
26 static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
27 const Tegra::Texture::TICEntry& tic,
28 const VideoCommon::Shader::Sampler& entry);
29
30 /// Creates SurfaceCachedParams from an image configuration.
31 static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
32 const Tegra::Texture::TICEntry& tic,
33 const VideoCommon::Shader::Image& entry);
34
35 /// Creates SurfaceCachedParams for a depth buffer configuration.
36 static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
37
38 /// Creates SurfaceCachedParams from a framebuffer configuration.
39 static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
40 std::size_t index);
41
42 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
43 static SurfaceParams CreateForFermiCopySurface(
44 const Tegra::Engines::Fermi2D::Regs::Surface& config);
45
46 /// Obtains the texture target from a shader's sampler entry.
47 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
48 const VideoCommon::Shader::Sampler& entry);
49
50 /// Obtains the texture target from a shader's sampler entry.
51 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
52 const VideoCommon::Shader::Image& entry);
53
54 std::size_t Hash() const {
55 return static_cast<std::size_t>(
56 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
57 }
58
59 bool operator==(const SurfaceParams& rhs) const;
60
61 bool operator!=(const SurfaceParams& rhs) const {
62 return !operator==(rhs);
63 }
64
65 std::size_t GetGuestSizeInBytes() const {
66 return GetInnerMemorySize(false, false, false);
67 }
68
69 std::size_t GetHostSizeInBytes(bool is_converted) const {
70 if (!is_converted) {
71 return GetInnerMemorySize(true, false, false);
72 }
73 // ASTC is uncompressed in software, in emulated as RGBA8
74 std::size_t host_size_in_bytes = 0;
75 for (u32 level = 0; level < num_levels; ++level) {
76 host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
77 }
78 return host_size_in_bytes;
79 }
80
81 u32 GetBlockAlignedWidth() const {
82 return Common::AlignUp(width, 64 / GetBytesPerPixel());
83 }
84
85 /// Returns the width of a given mipmap level.
86 u32 GetMipWidth(u32 level) const {
87 return std::max(1U, width >> level);
88 }
89
90 /// Returns the height of a given mipmap level.
91 u32 GetMipHeight(u32 level) const {
92 return std::max(1U, height >> level);
93 }
94
95 /// Returns the depth of a given mipmap level.
96 u32 GetMipDepth(u32 level) const {
97 return is_layered ? depth : std::max(1U, depth >> level);
98 }
99
100 /// Returns the block height of a given mipmap level.
101 u32 GetMipBlockHeight(u32 level) const;
102
103 /// Returns the block depth of a given mipmap level.
104 u32 GetMipBlockDepth(u32 level) const;
105
106 /// Returns the best possible row/pitch alignment for the surface.
107 u32 GetRowAlignment(u32 level, bool is_converted) const {
108 const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
109 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
110 }
111
112 /// Returns the offset in bytes in guest memory of a given mipmap level.
113 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
114
115 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
116 std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
117
118 /// Returns the size in bytes in guest memory of a given mipmap level.
119 std::size_t GetGuestMipmapSize(u32 level) const {
120 return GetInnerMipmapMemorySize(level, false, false);
121 }
122
123 /// Returns the size in bytes in host memory (linear) of a given mipmap level.
124 std::size_t GetHostMipmapSize(u32 level) const {
125 return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
126 }
127
128 std::size_t GetConvertedMipmapSize(u32 level) const;
129
130 /// Get this texture Tegra Block size in guest memory layout
131 u32 GetBlockSize() const;
132
133 /// Get X, Y coordinates max sizes of a single block.
134 std::pair<u32, u32> GetBlockXY() const;
135
136 /// Get the offset in x, y, z coordinates from a memory offset
137 std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
138
139 /// Returns the size of a layer in bytes in guest memory.
140 std::size_t GetGuestLayerSize() const {
141 return GetLayerSize(false, false);
142 }
143
144 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
145 std::size_t GetHostLayerSize(u32 level) const {
146 ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
147 return GetInnerMipmapMemorySize(level, true, false);
148 }
149
150 /// Returns the max possible mipmap that the texture can have in host gpu
151 u32 MaxPossibleMipmap() const {
152 const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
153 const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
154 const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
155 if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
156 return max_mipmap;
157 return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
158 }
159
160 /// Returns if the guest surface is a compressed surface.
161 bool IsCompressed() const {
162 return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
163 }
164
165 /// Returns the default block width.
166 u32 GetDefaultBlockWidth() const {
167 return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
168 }
169
170 /// Returns the default block height.
171 u32 GetDefaultBlockHeight() const {
172 return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
173 }
174
175 /// Returns the bits per pixel.
176 u32 GetBitsPerPixel() const {
177 return VideoCore::Surface::GetFormatBpp(pixel_format);
178 }
179
180 /// Returns the bytes per pixel.
181 u32 GetBytesPerPixel() const {
182 return VideoCore::Surface::GetBytesPerPixel(pixel_format);
183 }
184
185 /// Returns true if the pixel format is a depth and/or stencil format.
186 bool IsPixelFormatZeta() const {
187 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
188 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
189 }
190
191 /// Returns is the surface is a TextureBuffer type of surface.
192 bool IsBuffer() const {
193 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
194 }
195
196 /// Returns the number of layers in the surface.
197 std::size_t GetNumLayers() const {
198 return is_layered ? depth : 1;
199 }
200
201 /// Returns the debug name of the texture for use in graphic debuggers.
202 std::string TargetName() const;
203
204 // Helper used for out of class size calculations
205 static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
206 const u32 block_depth) {
207 return Common::AlignBits(out_size,
208 Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
209 }
210
211 /// Converts a width from a type of surface into another. This helps represent the
212 /// equivalent value between compressed/non-compressed textures.
213 static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
214 VideoCore::Surface::PixelFormat pixel_format_to) {
215 const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
216 const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
217 return (width * bw2 + bw1 - 1) / bw1;
218 }
219
220 /// Converts a height from a type of surface into another. This helps represent the
221 /// equivalent value between compressed/non-compressed textures.
222 static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
223 VideoCore::Surface::PixelFormat pixel_format_to) {
224 const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
225 const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
226 return (height * bh2 + bh1 - 1) / bh1;
227 }
228
229 // Finds the maximun possible width between 2 2D layers of different formats
230 static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
231 const u32 src_level, const u32 dst_level) {
232 const u32 bw1 = src_params.GetDefaultBlockWidth();
233 const u32 bw2 = dst_params.GetDefaultBlockWidth();
234 const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
235 const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
236 return std::min(t_src_width, t_dst_width);
237 }
238
239 // Finds the maximun possible height between 2 2D layers of different formats
240 static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
241 const u32 src_level, const u32 dst_level) {
242 const u32 bh1 = src_params.GetDefaultBlockHeight();
243 const u32 bh2 = dst_params.GetDefaultBlockHeight();
244 const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
245 const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
246 return std::min(t_src_height, t_dst_height);
247 }
248
249 bool is_tiled;
250 bool srgb_conversion;
251 bool is_layered;
252 u32 block_width;
253 u32 block_height;
254 u32 block_depth;
255 u32 tile_width_spacing;
256 u32 width;
257 u32 height;
258 u32 depth;
259 u32 pitch;
260 u32 num_levels;
261 u32 emulated_levels;
262 VideoCore::Surface::PixelFormat pixel_format;
263 VideoCore::Surface::SurfaceType type;
264 VideoCore::Surface::SurfaceTarget target;
265
266private:
267 /// Returns the size of a given mipmap level inside a layer.
268 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
269
270 /// Returns the size of all mipmap levels and aligns as needed.
271 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
272 return GetLayerSize(as_host_size, uncompressed) *
273 (layer_only ? 1U : (is_layered ? depth : 1U));
274 }
275
276 /// Returns the size of a layer
277 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
278
279 /// Returns true if these parameters are from a layered surface.
280 bool IsLayered() const;
281};
282
283} // namespace VideoCommon
284
285namespace std {
286
287template <>
288struct hash<VideoCommon::SurfaceParams> {
289 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
290 return k.Hash();
291 }
292};
293
294} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
deleted file mode 100644
index 6b5f5984b..000000000
--- a/src/video_core/texture_cache/surface_view.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/common_types.h"
8#include "video_core/texture_cache/surface_view.h"
9
10namespace VideoCommon {
11
12std::size_t ViewParams::Hash() const {
13 return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
14 (static_cast<std::size_t>(base_level) << 24) ^
15 (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
16}
17
18bool ViewParams::operator==(const ViewParams& rhs) const {
19 return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
20 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
21}
22
23bool ViewParams::operator!=(const ViewParams& rhs) const {
24 return !operator==(rhs);
25}
26
27} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
deleted file mode 100644
index 90a8bb0ae..000000000
--- a/src/video_core/texture_cache/surface_view.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8
9#include "common/common_types.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/surface_params.h"
12
13namespace VideoCommon {
14
15struct ViewParams {
16 constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer,
17 u32 num_layers, u32 base_level, u32 num_levels)
18 : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level},
19 num_levels{num_levels} {}
20
21 std::size_t Hash() const;
22
23 bool operator==(const ViewParams& rhs) const;
24 bool operator!=(const ViewParams& rhs) const;
25
26 bool IsLayered() const {
27 switch (target) {
28 case VideoCore::Surface::SurfaceTarget::Texture1DArray:
29 case VideoCore::Surface::SurfaceTarget::Texture2DArray:
30 case VideoCore::Surface::SurfaceTarget::TextureCubemap:
31 case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
32 return true;
33 default:
34 return false;
35 }
36 }
37
38 VideoCore::Surface::SurfaceTarget target{};
39 u32 base_layer{};
40 u32 num_layers{};
41 u32 base_level{};
42 u32 num_levels{};
43};
44
45class ViewBase {
46public:
47 constexpr explicit ViewBase(const ViewParams& params) : params{params} {}
48
49 constexpr const ViewParams& GetViewParams() const {
50 return params;
51 }
52
53protected:
54 ViewParams params;
55};
56
57} // namespace VideoCommon
58
59namespace std {
60
61template <>
62struct hash<VideoCommon::ViewParams> {
63 std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
64 return k.Hash();
65 }
66};
67
68} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index ea835c59f..968059842 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,1299 +6,1449 @@
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9#include <list> 9#include <bit>
10#include <memory> 10#include <memory>
11#include <mutex> 11#include <mutex>
12#include <set> 12#include <optional>
13#include <tuple> 13#include <span>
14#include <type_traits>
14#include <unordered_map> 15#include <unordered_map>
16#include <utility>
15#include <vector> 17#include <vector>
16 18
17#include <boost/container/small_vector.hpp> 19#include <boost/container/small_vector.hpp>
18#include <boost/icl/interval_map.hpp>
19#include <boost/range/iterator_range.hpp>
20 20
21#include "common/assert.h" 21#include "common/alignment.h"
22#include "common/common_funcs.h"
22#include "common/common_types.h" 23#include "common/common_types.h"
23#include "common/math_util.h" 24#include "common/logging/log.h"
24#include "core/core.h"
25#include "core/memory.h"
26#include "core/settings.h"
27#include "video_core/compatible_formats.h" 25#include "video_core/compatible_formats.h"
26#include "video_core/delayed_destruction_ring.h"
28#include "video_core/dirty_flags.h" 27#include "video_core/dirty_flags.h"
29#include "video_core/engines/fermi_2d.h" 28#include "video_core/engines/fermi_2d.h"
29#include "video_core/engines/kepler_compute.h"
30#include "video_core/engines/maxwell_3d.h" 30#include "video_core/engines/maxwell_3d.h"
31#include "video_core/gpu.h"
32#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 32#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h" 33#include "video_core/surface.h"
35#include "video_core/texture_cache/copy_params.h" 34#include "video_core/texture_cache/descriptor_table.h"
36#include "video_core/texture_cache/format_lookup_table.h" 35#include "video_core/texture_cache/format_lookup_table.h"
37#include "video_core/texture_cache/surface_base.h" 36#include "video_core/texture_cache/formatter.h"
38#include "video_core/texture_cache/surface_params.h" 37#include "video_core/texture_cache/image_base.h"
39#include "video_core/texture_cache/surface_view.h" 38#include "video_core/texture_cache/image_info.h"
40 39#include "video_core/texture_cache/image_view_base.h"
41namespace Tegra::Texture { 40#include "video_core/texture_cache/image_view_info.h"
42struct FullTextureInfo; 41#include "video_core/texture_cache/render_targets.h"
43} 42#include "video_core/texture_cache/samples_helper.h"
44 43#include "video_core/texture_cache/slot_vector.h"
45namespace VideoCore { 44#include "video_core/texture_cache/types.h"
46class RasterizerInterface; 45#include "video_core/texture_cache/util.h"
47} 46#include "video_core/textures/texture.h"
48 47
49namespace VideoCommon { 48namespace VideoCommon {
50 49
51using VideoCore::Surface::FormatCompatibility; 50using Tegra::Texture::SwizzleSource;
51using Tegra::Texture::TextureType;
52using Tegra::Texture::TICEntry;
53using Tegra::Texture::TSCEntry;
54using VideoCore::Surface::GetFormatType;
55using VideoCore::Surface::IsCopyCompatible;
52using VideoCore::Surface::PixelFormat; 56using VideoCore::Surface::PixelFormat;
53using VideoCore::Surface::SurfaceTarget; 57using VideoCore::Surface::PixelFormatFromDepthFormat;
54using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 58using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
59using VideoCore::Surface::SurfaceType;
55 60
56template <typename TSurface, typename TView> 61template <class P>
57class TextureCache { 62class TextureCache {
58 using VectorSurface = boost::container::small_vector<TSurface, 1>; 63 /// Address shift for caching images into a hash table
64 static constexpr u64 PAGE_SHIFT = 20;
65
66 /// Enables debugging features to the texture cache
67 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
68 /// Implement blits as copies between framebuffers
69 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
70 /// True when some copies have to be emulated
71 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
72
73 /// Image view ID for null descriptors
74 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
75 /// Sampler ID for bugged sampler ids
76 static constexpr SamplerId NULL_SAMPLER_ID{0};
77
78 using Runtime = typename P::Runtime;
79 using Image = typename P::Image;
80 using ImageAlloc = typename P::ImageAlloc;
81 using ImageView = typename P::ImageView;
82 using Sampler = typename P::Sampler;
83 using Framebuffer = typename P::Framebuffer;
84
85 struct BlitImages {
86 ImageId dst_id;
87 ImageId src_id;
88 PixelFormat dst_format;
89 PixelFormat src_format;
90 };
91
92 template <typename T>
93 struct IdentityHash {
94 [[nodiscard]] size_t operator()(T value) const noexcept {
95 return static_cast<size_t>(value);
96 }
97 };
59 98
60public: 99public:
61 void InvalidateRegion(VAddr addr, std::size_t size) { 100 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
62 std::lock_guard lock{mutex}; 101 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
63 102
64 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 103 /// Notify the cache that a new frame has been queued
65 Unregister(surface); 104 void TickFrame();
66 }
67 }
68 105
69 void OnCPUWrite(VAddr addr, std::size_t size) { 106 /// Return an unique mutually exclusive lock for the cache
70 std::lock_guard lock{mutex}; 107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
71 108
72 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 109 /// Return a constant reference to the given image view id
73 if (surface->IsMemoryMarked()) { 110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
74 UnmarkMemory(surface);
75 surface->SetSyncPending(true);
76 marked_for_unregister.emplace_back(surface);
77 }
78 }
79 }
80 111
81 void SyncGuestHost() { 112 /// Return a reference to the given image view id
82 std::lock_guard lock{mutex}; 113 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
83 114
84 for (const auto& surface : marked_for_unregister) { 115 /// Fill image_view_ids with the graphics images in indices
85 if (surface->IsRegistered()) { 116 void FillGraphicsImageViews(std::span<const u32> indices,
86 surface->SetSyncPending(false); 117 std::span<ImageViewId> image_view_ids);
87 Unregister(surface);
88 }
89 }
90 marked_for_unregister.clear();
91 }
92 118
93 /** 119 /// Fill image_view_ids with the compute images in indices
94 * Guarantees that rendertargets don't unregister themselves if the 120 void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
95 * collide. Protection is currently only done on 3D slices.
96 */
97 void GuardRenderTargets(bool new_guard) {
98 guard_render_targets = new_guard;
99 }
100 121
101 void GuardSamplers(bool new_guard) { 122 /// Get the sampler from the graphics descriptor table in the specified index
102 guard_samplers = new_guard; 123 Sampler* GetGraphicsSampler(u32 index);
103 }
104 124
105 void FlushRegion(VAddr addr, std::size_t size) { 125 /// Get the sampler from the compute descriptor table in the specified index
106 std::lock_guard lock{mutex}; 126 Sampler* GetComputeSampler(u32 index);
107 127
108 auto surfaces = GetSurfacesInRegion(addr, size); 128 /// Refresh the state for graphics image view and sampler descriptors
109 if (surfaces.empty()) { 129 void SynchronizeGraphicsDescriptors();
110 return;
111 }
112 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
113 return a->GetModificationTick() < b->GetModificationTick();
114 });
115 for (const auto& surface : surfaces) {
116 mutex.unlock();
117 FlushSurface(surface);
118 mutex.lock();
119 }
120 }
121 130
122 bool MustFlushRegion(VAddr addr, std::size_t size) { 131 /// Refresh the state for compute image view and sampler descriptors
123 std::lock_guard lock{mutex}; 132 void SynchronizeComputeDescriptors();
124 133
125 const auto surfaces = GetSurfacesInRegion(addr, size); 134 /// Update bound render targets and upload memory if necessary
126 return std::any_of(surfaces.cbegin(), surfaces.cend(), 135 /// @param is_clear True when the render targets are being used for clears
127 [](const TSurface& surface) { return surface->IsModified(); }); 136 void UpdateRenderTargets(bool is_clear);
128 }
129 137
130 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 138 /// Find a framebuffer with the currently bound render targets
131 const VideoCommon::Shader::Sampler& entry) { 139 /// UpdateRenderTargets should be called before this
132 std::lock_guard lock{mutex}; 140 Framebuffer* GetFramebuffer();
133 const auto gpu_addr{tic.Address()};
134 if (!gpu_addr) {
135 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
136 }
137 141
138 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 142 /// Mark images in a range as modified from the CPU
139 if (!cpu_addr) { 143 void WriteMemory(VAddr cpu_addr, size_t size);
140 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
141 }
142 144
143 if (!IsTypeCompatible(tic.texture_type, entry)) { 145 /// Download contents of host images to guest memory in a region
144 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 146 void DownloadMemory(VAddr cpu_addr, size_t size);
145 }
146 147
147 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 148 /// Remove images in a region
148 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 149 void UnmapMemory(VAddr cpu_addr, size_t size);
149 if (guard_samplers) {
150 sampled_textures.push_back(surface);
151 }
152 return view;
153 }
154 150
155 TView GetImageSurface(const Tegra::Texture::TICEntry& tic, 151 /// Blit an image with the given parameters
156 const VideoCommon::Shader::Image& entry) { 152 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
157 std::lock_guard lock{mutex}; 153 const Tegra::Engines::Fermi2D::Surface& src,
158 const auto gpu_addr{tic.Address()}; 154 const Tegra::Engines::Fermi2D::Config& copy);
159 if (!gpu_addr) {
160 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
161 }
162 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
163 if (!cpu_addr) {
164 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
165 }
166 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
167 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
168 if (guard_samplers) {
169 sampled_textures.push_back(surface);
170 }
171 return view;
172 }
173 155
174 bool TextureBarrier() { 156 /// Invalidate the contents of the color buffer index
175 const bool any_rt = 157 /// These contents become unspecified, the cache can assume aggressive optimizations.
176 std::any_of(sampled_textures.begin(), sampled_textures.end(), 158 void InvalidateColorBuffer(size_t index);
177 [](const auto& surface) { return surface->IsRenderTarget(); });
178 sampled_textures.clear();
179 return any_rt;
180 }
181 159
182 TView GetDepthBufferSurface(bool preserve_contents) { 160 /// Invalidate the contents of the depth buffer
183 std::lock_guard lock{mutex}; 161 /// These contents become unspecified, the cache can assume aggressive optimizations.
184 auto& dirty = maxwell3d.dirty; 162 void InvalidateDepthBuffer();
185 if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
186 return depth_buffer.view;
187 }
188 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
189 163
190 const auto& regs{maxwell3d.regs}; 164 /// Try to find a cached image view in the given CPU address
191 const auto gpu_addr{regs.zeta.Address()}; 165 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
192 if (!gpu_addr || !regs.zeta_enable) {
193 SetEmptyDepthBuffer();
194 return {};
195 }
196 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
197 if (!cpu_addr) {
198 SetEmptyDepthBuffer();
199 return {};
200 }
201 const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
202 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
203 if (depth_buffer.target)
204 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
205 depth_buffer.target = surface_view.first;
206 depth_buffer.view = surface_view.second;
207 if (depth_buffer.target)
208 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
209 return surface_view.second;
210 }
211
212 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
213 std::lock_guard lock{mutex};
214 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
215 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
216 return render_targets[index].view;
217 }
218 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
219 166
220 const auto& regs{maxwell3d.regs}; 167 /// Return true when there are uncommitted images to be downloaded
221 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 168 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
222 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
223 SetEmptyColorBuffer(index);
224 return {};
225 }
226 169
227 const auto& config{regs.rt[index]}; 170 /// Return true when the caller should wait for async downloads
228 const auto gpu_addr{config.Address()}; 171 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
229 if (!gpu_addr) {
230 SetEmptyColorBuffer(index);
231 return {};
232 }
233 172
234 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 173 /// Commit asynchronous downloads
235 if (!cpu_addr) { 174 void CommitAsyncFlushes();
236 SetEmptyColorBuffer(index); 175
237 return {}; 176 /// Pop asynchronous downloads
238 } 177 void PopAsyncFlushes();
178
179 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
239 181
240 auto surface_view = 182private:
241 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), 183 /// Iterate over all page indices in a range
242 preserve_contents, true); 184 template <typename Func>
243 if (render_targets[index].target) { 185 static void ForEachPage(VAddr addr, size_t size, Func&& func) {
244 auto& surface = render_targets[index].target; 186 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
245 surface->MarkAsRenderTarget(false, NO_RT); 187 const u64 page_end = (addr + size - 1) >> PAGE_SHIFT;
246 const auto& cr_params = surface->GetSurfaceParams(); 188 for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
247 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 189 if constexpr (RETURNS_BOOL) {
248 AsyncFlushSurface(surface); 190 if (func(page)) {
191 break;
192 }
193 } else {
194 func(page);
249 } 195 }
250 } 196 }
251 render_targets[index].target = surface_view.first;
252 render_targets[index].view = surface_view.second;
253 if (render_targets[index].target)
254 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
255 return surface_view.second;
256 } 197 }
257 198
258 void MarkColorBufferInUse(std::size_t index) { 199 /// Fills image_view_ids in the image views in indices
259 if (auto& render_target = render_targets[index].target) { 200 void FillImageViews(DescriptorTable<TICEntry>& table,
260 render_target->MarkAsModified(true, Tick()); 201 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
261 } 202 std::span<ImageViewId> image_view_ids);
262 }
263 203
264 void MarkDepthBufferInUse() { 204 /// Find or create an image view in the guest descriptor table
265 if (depth_buffer.target) { 205 ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
266 depth_buffer.target->MarkAsModified(true, Tick()); 206 std::span<ImageViewId> cached_image_view_ids, u32 index);
267 }
268 }
269 207
270 void SetEmptyDepthBuffer() { 208 /// Find or create a framebuffer with the given render target parameters
271 if (depth_buffer.target == nullptr) { 209 FramebufferId GetFramebufferId(const RenderTargets& key);
272 return;
273 }
274 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
275 depth_buffer.target = nullptr;
276 depth_buffer.view = nullptr;
277 }
278 210
279 void SetEmptyColorBuffer(std::size_t index) { 211 /// Refresh the contents (pixel data) of an image
280 if (render_targets[index].target == nullptr) { 212 void RefreshContents(Image& image);
281 return;
282 }
283 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
284 render_targets[index].target = nullptr;
285 render_targets[index].view = nullptr;
286 }
287
288 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
289 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
290 const Tegra::Engines::Fermi2D::Config& copy_config) {
291 std::lock_guard lock{mutex};
292 SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
293 SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
294 const GPUVAddr src_gpu_addr = src_config.Address();
295 const GPUVAddr dst_gpu_addr = dst_config.Address();
296 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
297
298 const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
299 const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
300 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
301 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
302 ImageBlit(src_surface, dst_surface.second, copy_config);
303 dst_surface.first->MarkAsModified(true, Tick());
304 }
305
306 TSurface TryFindFramebufferSurface(VAddr addr) const {
307 if (!addr) {
308 return nullptr;
309 }
310 const VAddr page = addr >> registry_page_bits;
311 const auto it = registry.find(page);
312 if (it == registry.end()) {
313 return nullptr;
314 }
315 const auto& list = it->second;
316 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
317 return surface->GetCpuAddr() == addr;
318 });
319 return found != list.end() ? *found : nullptr;
320 }
321 213
322 u64 Tick() { 214 /// Upload data from guest to an image
323 return ++ticks; 215 template <typename MapBuffer>
324 } 216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
325 217
326 void CommitAsyncFlushes() { 218 /// Find or create an image view from a guest descriptor
327 committed_flushes.push_back(uncommitted_flushes); 219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
328 uncommitted_flushes.reset();
329 }
330 220
331 bool HasUncommittedFlushes() const { 221 /// Create a new image view from a guest descriptor
332 return uncommitted_flushes != nullptr; 222 [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
333 }
334 223
335 bool ShouldWaitAsyncFlushes() const { 224 /// Find or create an image from the given parameters
336 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 225 [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
337 } 226 RelaxedOptions options = RelaxedOptions{});
338 227
339 void PopAsyncFlushes() { 228 /// Find an image from the given parameters
340 if (committed_flushes.empty()) { 229 [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
341 return; 230 RelaxedOptions options);
342 }
343 auto& flush_list = committed_flushes.front();
344 if (!flush_list) {
345 committed_flushes.pop_front();
346 return;
347 }
348 for (TSurface& surface : *flush_list) {
349 FlushSurface(surface);
350 }
351 committed_flushes.pop_front();
352 }
353 231
354protected: 232 /// Create an image from the given parameters
355 explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, 233 [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
356 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 234 RelaxedOptions options);
357 bool is_astc_supported_)
358 : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
359 gpu_memory{gpu_memory_} {
360 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
361 SetEmptyColorBuffer(i);
362 }
363 235
364 SetEmptyDepthBuffer(); 236 /// Create a new image and join perfectly matching existing images
365 staging_cache.SetSize(2); 237 /// Remove joined images from the cache
238 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
366 239
367 const auto make_siblings = [this](PixelFormat a, PixelFormat b) { 240 /// Return a blit image pair from the given guest blit parameters
368 siblings_table[static_cast<std::size_t>(a)] = b; 241 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
369 siblings_table[static_cast<std::size_t>(b)] = a; 242 const Tegra::Engines::Fermi2D::Surface& src);
370 };
371 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
372 make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
373 make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
374 make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
375 243
376 sampled_textures.reserve(64); 244 /// Find or create a sampler from a guest descriptor sampler
377 } 245 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
378 246
379 ~TextureCache() = default; 247 /// Find or create an image view for the given color buffer index
248 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
380 249
381 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; 250 /// Find or create an image view for the depth buffer
251 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
382 252
383 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, 253 /// Find or create a view for a render target with the given image parameters
384 const CopyParams& copy_params) = 0; 254 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
255 bool is_clear);
385 256
386 virtual void ImageBlit(TView& src_view, TView& dst_view, 257 /// Iterates over all the images in a region calling func
387 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 258 template <typename Func>
259 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
388 260
389 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture 261 /// Find or create an image view in the given image with the passed parameters
390 // and reading it from a separate buffer. 262 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
391 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
392 263
393 void ManageRenderTargetUnregister(TSurface& surface) { 264 /// Register image in the page table
394 auto& dirty = maxwell3d.dirty; 265 void RegisterImage(ImageId image);
395 const u32 index = surface->GetRenderTarget(); 266
396 if (index == DEPTH_RT) { 267 /// Unregister image from the page table
397 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; 268 void UnregisterImage(ImageId image);
398 } else { 269
399 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; 270 /// Track CPU reads and writes for image
400 } 271 void TrackImage(ImageBase& image);
401 dirty.flags[VideoCommon::Dirty::RenderTargets] = true; 272
273 /// Stop tracking CPU reads and writes for image
274 void UntrackImage(ImageBase& image);
275
276 /// Delete image from the cache
277 void DeleteImage(ImageId image);
278
279 /// Remove image views references from the cache
280 void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
281
282 /// Remove framebuffers using the given image views from the cache
283 void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
284
285 /// Mark an image as modified from the GPU
286 void MarkModification(ImageBase& image) noexcept;
287
288 /// Synchronize image aliases, copying data if needed
289 void SynchronizeAliases(ImageId image_id);
290
291 /// Prepare an image to be used
292 void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
293
294 /// Prepare an image view to be used
295 void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
296
297 /// Execute copies from one image to the other, even if they are incompatible
298 void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
299
300 /// Bind an image view as render target, downloading resources preemtively if needed
301 void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
302
303 /// Create a render target from a given image and image view parameters
304 [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
305 ImageId, const ImageViewInfo& view_info);
306
307 /// Returns true if the current clear parameters clear the whole image of a given image view
308 [[nodiscard]] bool IsFullClear(ImageViewId id);
309
310 Runtime& runtime;
311 VideoCore::RasterizerInterface& rasterizer;
312 Tegra::Engines::Maxwell3D& maxwell3d;
313 Tegra::Engines::KeplerCompute& kepler_compute;
314 Tegra::MemoryManager& gpu_memory;
315
316 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
317 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
318 std::vector<SamplerId> graphics_sampler_ids;
319 std::vector<ImageViewId> graphics_image_view_ids;
320
321 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
322 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
323 std::vector<SamplerId> compute_sampler_ids;
324 std::vector<ImageViewId> compute_image_view_ids;
325
326 RenderTargets render_targets;
327
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
333
334 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
335
336 bool has_deleted_images = false;
337
338 SlotVector<Image> slot_images;
339 SlotVector<ImageView> slot_image_views;
340 SlotVector<ImageAlloc> slot_image_allocs;
341 SlotVector<Sampler> slot_samplers;
342 SlotVector<Framebuffer> slot_framebuffers;
343
344 // TODO: This data structure is not optimal and it should be reworked
345 std::vector<ImageId> uncommitted_downloads;
346 std::queue<std::vector<ImageId>> committed_downloads;
347
348 static constexpr size_t TICKS_TO_DESTROY = 6;
349 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
350 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
351 DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
352
353 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
354
355 u64 modification_tick = 0;
356 u64 frame_tick = 0;
357};
358
359template <class P>
360TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
361 Tegra::Engines::Maxwell3D& maxwell3d_,
362 Tegra::Engines::KeplerCompute& kepler_compute_,
363 Tegra::MemoryManager& gpu_memory_)
364 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
365 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
366 // Configure null sampler
367 TSCEntry sampler_descriptor{};
368 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
369 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
370 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
371 sampler_descriptor.cubemap_anisotropy.Assign(1);
372
373 // Make sure the first index is reserved for the null resources
374 // This way the null resource becomes a compile time constant
375 void(slot_image_views.insert(runtime, NullImageParams{}));
376 void(slot_samplers.insert(runtime, sampler_descriptor));
377}
378
379template <class P>
380void TextureCache<P>::TickFrame() {
381 // Tick sentenced resources in this order to ensure they are destroyed in the right order
382 sentenced_images.Tick();
383 sentenced_framebuffers.Tick();
384 sentenced_image_view.Tick();
385 ++frame_tick;
386}
387
388template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id];
396}
397
398template <class P>
399typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
400 return slot_image_views[id];
401}
402
403template <class P>
404void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
405 std::span<ImageViewId> image_view_ids) {
406 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
407}
408
409template <class P>
410void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
411 std::span<ImageViewId> image_view_ids) {
412 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
413}
414
415template <class P>
416typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
417 [[unlikely]] if (index > graphics_sampler_table.Limit()) {
418 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
419 return &slot_samplers[NULL_SAMPLER_ID];
420 }
421 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
422 SamplerId& id = graphics_sampler_ids[index];
423 [[unlikely]] if (is_new) {
424 id = FindSampler(descriptor);
402 } 425 }
426 return &slot_samplers[id];
427}
403 428
404 void Register(TSurface surface) { 429template <class P>
405 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 430typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
406 const std::size_t size = surface->GetSizeInBytes(); 431 [[unlikely]] if (index > compute_sampler_table.Limit()) {
407 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 432 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
408 if (!cpu_addr) { 433 return &slot_samplers[NULL_SAMPLER_ID];
409 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 434 }
410 gpu_addr); 435 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
411 return; 436 SamplerId& id = compute_sampler_ids[index];
412 } 437 [[unlikely]] if (is_new) {
413 surface->SetCpuAddr(*cpu_addr); 438 id = FindSampler(descriptor);
414 RegisterInnerCache(surface);
415 surface->MarkAsRegistered(true);
416 surface->SetMemoryMarked(true);
417 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
418 } 439 }
440 return &slot_samplers[id];
441}
419 442
420 void UnmarkMemory(TSurface surface) { 443template <class P>
421 if (!surface->IsMemoryMarked()) { 444void TextureCache<P>::SynchronizeGraphicsDescriptors() {
422 return; 445 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
423 } 446 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
424 const std::size_t size = surface->GetSizeInBytes(); 447 const u32 tic_limit = maxwell3d.regs.tic.limit;
425 const VAddr cpu_addr = surface->GetCpuAddr(); 448 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
426 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 449 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
427 surface->SetMemoryMarked(false); 450 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
428 } 451 }
452 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
453 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
454 }
455}
429 456
430 void Unregister(TSurface surface) { 457template <class P>
431 if (guard_render_targets && surface->IsProtected()) { 458void TextureCache<P>::SynchronizeComputeDescriptors() {
432 return; 459 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
433 } 460 const u32 tic_limit = kepler_compute.regs.tic.limit;
434 if (!guard_render_targets && surface->IsRenderTarget()) { 461 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
435 ManageRenderTargetUnregister(surface); 462 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
436 } 463 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
437 UnmarkMemory(surface); 464 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
438 if (surface->IsSyncPending()) {
439 marked_for_unregister.remove(surface);
440 surface->SetSyncPending(false);
441 }
442 UnregisterInnerCache(surface);
443 surface->MarkAsRegistered(false);
444 ReserveSurface(surface->GetSurfaceParams(), surface);
445 } 465 }
466 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
467 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
468 }
469}
446 470
447 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 471template <class P>
448 if (const auto surface = TryGetReservedSurface(params); surface) { 472void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
449 surface->SetGpuAddr(gpu_addr); 473 using namespace VideoCommon::Dirty;
450 return surface; 474 auto& flags = maxwell3d.dirty.flags;
451 } 475 if (!flags[Dirty::RenderTargets]) {
452 // No reserved surface available, create a new one and reserve it 476 return;
453 auto new_surface{CreateSurface(gpu_addr, params)};
454 return new_surface;
455 } 477 }
478 flags[Dirty::RenderTargets] = false;
456 479
457 const bool is_astc_supported; 480 // Render target control is used on all render targets, so force look ups when this one is up
481 const bool force = flags[Dirty::RenderTargetControl];
482 flags[Dirty::RenderTargetControl] = false;
458 483
459private: 484 for (size_t index = 0; index < NUM_RT; ++index) {
460 enum class RecycleStrategy : u32 { 485 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
461 Ignore = 0, 486 if (flags[Dirty::ColorBuffer0 + index] || force) {
462 Flush = 1, 487 flags[Dirty::ColorBuffer0 + index] = false;
463 BufferCopy = 3, 488 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
464 }; 489 }
490 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
491 }
492 if (flags[Dirty::ZetaBuffer] || force) {
493 flags[Dirty::ZetaBuffer] = false;
494 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
495 }
496 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
497 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
465 498
466 enum class DeductionType : u32 { 499 for (size_t index = 0; index < NUM_RT; ++index) {
467 DeductionComplete, 500 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
468 DeductionIncomplete, 501 }
469 DeductionFailed, 502 render_targets.size = Extent2D{
503 maxwell3d.regs.render_area.width,
504 maxwell3d.regs.render_area.height,
470 }; 505 };
506}
471 507
472 struct Deduction { 508template <class P>
473 DeductionType type{DeductionType::DeductionFailed}; 509typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
474 TSurface surface{}; 510 return &slot_framebuffers[GetFramebufferId(render_targets)];
511}
475 512
476 bool Failed() const { 513template <class P>
477 return type == DeductionType::DeductionFailed; 514void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
478 } 515 std::span<ImageViewId> cached_image_view_ids,
516 std::span<const u32> indices,
517 std::span<ImageViewId> image_view_ids) {
518 ASSERT(indices.size() <= image_view_ids.size());
519 do {
520 has_deleted_images = false;
521 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
522 return VisitImageView(table, cached_image_view_ids, index);
523 });
524 } while (has_deleted_images);
525}
479 526
480 bool Incomplete() const { 527template <class P>
481 return type == DeductionType::DeductionIncomplete; 528ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
482 } 529 std::span<ImageViewId> cached_image_view_ids,
530 u32 index) {
531 if (index > table.Limit()) {
532 LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
533 return NULL_IMAGE_VIEW_ID;
534 }
535 const auto [descriptor, is_new] = table.Read(index);
536 ImageViewId& image_view_id = cached_image_view_ids[index];
537 if (is_new) {
538 image_view_id = FindImageView(descriptor);
539 }
540 if (image_view_id != NULL_IMAGE_VIEW_ID) {
541 PrepareImageView(image_view_id, false, false);
542 }
543 return image_view_id;
544}
483 545
484 bool IsDepth() const { 546template <class P>
485 return surface->GetSurfaceParams().IsPixelFormatZeta(); 547FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
486 } 548 const auto [pair, is_new] = framebuffers.try_emplace(key);
487 }; 549 FramebufferId& framebuffer_id = pair->second;
550 if (!is_new) {
551 return framebuffer_id;
552 }
553 std::array<ImageView*, NUM_RT> color_buffers;
554 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
555 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
556 ImageView* const depth_buffer =
557 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
558 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
559 return framebuffer_id;
560}
488 561
489 /** 562template <class P>
490 * Takes care of selecting a proper strategy to deal with a texture recycle. 563void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
491 * 564 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
492 * @param overlaps The overlapping surfaces registered in the cache. 565 if (True(image.flags & ImageFlagBits::CpuModified)) {
493 * @param params The parameters on the new surface. 566 return;
494 * @param gpu_addr The starting address of the new surface.
495 * @param untopological Indicates to the recycler that the texture has no way
496 * to match the overlaps due to topological reasons.
497 **/
498 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
499 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
500 if (Settings::IsGPULevelExtreme()) {
501 return RecycleStrategy::Flush;
502 }
503 // 3D Textures decision
504 if (params.target == SurfaceTarget::Texture3D) {
505 return RecycleStrategy::Flush;
506 }
507 for (const auto& s : overlaps) {
508 const auto& s_params = s->GetSurfaceParams();
509 if (s_params.target == SurfaceTarget::Texture3D) {
510 return RecycleStrategy::Flush;
511 }
512 }
513 // Untopological decision
514 if (untopological == MatchTopologyResult::CompressUnmatch) {
515 return RecycleStrategy::Flush;
516 }
517 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
518 return RecycleStrategy::Flush;
519 }
520 return RecycleStrategy::Ignore;
521 }
522
523 /**
524 * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
525 * strategies: Ignore and Flush.
526 *
527 * - Ignore: Just unregisters all the overlaps and loads the new texture.
528 * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
529 *
530 * @param overlaps The overlapping surfaces registered in the cache.
531 * @param params The parameters for the new surface.
532 * @param gpu_addr The starting address of the new surface.
533 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
534 * blank.
535 * @param untopological Indicates to the recycler that the texture has no way to match the
536 * overlaps due to topological reasons.
537 **/
538 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
539 const GPUVAddr gpu_addr, const bool preserve_contents,
540 const MatchTopologyResult untopological) {
541 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
542 for (auto& surface : overlaps) {
543 Unregister(surface);
544 }
545 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
546 case RecycleStrategy::Ignore: {
547 return InitializeSurface(gpu_addr, params, do_load);
548 }
549 case RecycleStrategy::Flush: {
550 std::sort(overlaps.begin(), overlaps.end(),
551 [](const TSurface& a, const TSurface& b) -> bool {
552 return a->GetModificationTick() < b->GetModificationTick();
553 });
554 for (auto& surface : overlaps) {
555 FlushSurface(surface);
556 }
557 return InitializeSurface(gpu_addr, params, preserve_contents);
558 } 567 }
559 case RecycleStrategy::BufferCopy: { 568 image.flags |= ImageFlagBits::CpuModified;
560 auto new_surface = GetUncachedSurface(gpu_addr, params); 569 UntrackImage(image);
561 BufferCopy(overlaps[0], new_surface); 570 });
562 return {new_surface, new_surface->GetMainView()}; 571}
572
573template <class P>
574void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
575 std::vector<ImageId> images;
576 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
577 // Skip images that were not modified from the GPU
578 if (False(image.flags & ImageFlagBits::GpuModified)) {
579 return;
563 } 580 }
564 default: { 581 // Skip images that .are. modified from the CPU
565 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 582 // We don't want to write sensitive data from the guest
566 return InitializeSurface(gpu_addr, params, do_load); 583 if (True(image.flags & ImageFlagBits::CpuModified)) {
584 return;
567 } 585 }
586 if (image.info.num_samples > 1) {
587 LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
588 return;
568 } 589 }
590 image.flags &= ~ImageFlagBits::GpuModified;
591 images.push_back(image_id);
592 });
593 if (images.empty()) {
594 return;
595 }
596 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
597 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
598 });
599 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies);
604 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
569 } 606 }
607}
570 608
571 /** 609template <class P>
572 * Takes a single surface and recreates into another that may differ in 610void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
573 * format, target or width alignment. 611 std::vector<ImageId> deleted_images;
574 * 612 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
575 * @param current_surface The registered surface in the cache which we want to convert. 613 for (const ImageId id : deleted_images) {
576 * @param params The new surface params which we'll use to recreate the surface. 614 Image& image = slot_images[id];
577 * @param is_render Whether or not the surface is a render target. 615 if (True(image.flags & ImageFlagBits::Tracked)) {
578 **/ 616 UntrackImage(image);
579 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, 617 }
580 bool is_render) { 618 UnregisterImage(id);
581 const auto gpu_addr = current_surface->GetGpuAddr(); 619 DeleteImage(id);
582 const auto& cr_params = current_surface->GetSurfaceParams(); 620 }
583 TSurface new_surface; 621}
584 if (cr_params.pixel_format != params.pixel_format && !is_render &&
585 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
586 SurfaceParams new_params = params;
587 new_params.pixel_format = cr_params.pixel_format;
588 new_params.type = cr_params.type;
589 new_surface = GetUncachedSurface(gpu_addr, new_params);
590 } else {
591 new_surface = GetUncachedSurface(gpu_addr, params);
592 }
593 const SurfaceParams& final_params = new_surface->GetSurfaceParams();
594 if (cr_params.type != final_params.type) {
595 if (Settings::IsGPULevelExtreme()) {
596 BufferCopy(current_surface, new_surface);
597 }
598 } else {
599 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
600 for (auto& brick : bricks) {
601 TryCopyImage(current_surface, new_surface, brick);
602 }
603 }
604 Unregister(current_surface);
605 Register(new_surface);
606 new_surface->MarkAsModified(current_surface->IsModified(), Tick());
607 return {new_surface, new_surface->GetMainView()};
608 }
609
610 /**
611 * Takes a single surface and checks with the new surface's params if it's an exact
612 * match, we return the main view of the registered surface. If its formats don't
613 * match, we rebuild the surface. We call this last method a `Mirage`. If formats
614 * match but the targets don't, we create an overview View of the registered surface.
615 *
616 * @param current_surface The registered surface in the cache which we want to convert.
617 * @param params The new surface params which we want to check.
618 * @param is_render Whether or not the surface is a render target.
619 **/
620 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
621 const SurfaceParams& params, bool is_render) {
622 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
623 const bool matches_target = current_surface->MatchTarget(params.target);
624 const auto match_check = [&]() -> std::pair<TSurface, TView> {
625 if (matches_target) {
626 return {current_surface, current_surface->GetMainView()};
627 }
628 return {current_surface, current_surface->EmplaceOverview(params)};
629 };
630 if (!is_mirage) {
631 return match_check();
632 }
633 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
634 return match_check();
635 }
636 return RebuildSurface(current_surface, params, is_render);
637 }
638
639 /**
640 * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
641 * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
642 * of the new surface, if they all match we end up recreating a surface for them,
643 * else we return nothing.
644 *
645 * @param overlaps The overlapping surfaces registered in the cache.
646 * @param params The parameters on the new surface.
647 * @param gpu_addr The starting address of the new surface.
648 **/
649 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
650 const SurfaceParams& params,
651 GPUVAddr gpu_addr) {
652 if (params.target == SurfaceTarget::Texture3D) {
653 return std::nullopt;
654 }
655 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
656 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
657 622
658 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { 623template <class P>
659 LoadSurface(new_surface); 624void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
660 for (const auto& surface : overlaps) { 625 const Tegra::Engines::Fermi2D::Surface& src,
661 Unregister(surface); 626 const Tegra::Engines::Fermi2D::Config& copy) {
662 } 627 const BlitImages images = GetBlitImages(dst, src);
663 Register(new_surface); 628 const ImageId dst_id = images.dst_id;
664 return {{new_surface, new_surface->GetMainView()}}; 629 const ImageId src_id = images.src_id;
665 } 630 PrepareImage(src_id, false, false);
631 PrepareImage(dst_id, true, false);
632
633 ImageBase& dst_image = slot_images[dst_id];
634 const ImageBase& src_image = slot_images[src_id];
635
636 // TODO: Deduplicate
637 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
638 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
639 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
640 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
641 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
642 const std::array src_region{
643 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
644 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
645 };
666 646
667 std::size_t passed_tests = 0; 647 const std::optional src_base = src_image.TryFindBase(src.Address());
668 for (auto& surface : overlaps) { 648 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
669 const SurfaceParams& src_params = surface->GetSurfaceParams(); 649 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
670 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; 650 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
671 if (!mipmap_layer) { 651 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
672 continue; 652 const std::array dst_region{
673 } 653 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
674 const auto [base_layer, base_mipmap] = *mipmap_layer; 654 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
675 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { 655 };
676 continue;
677 }
678 ++passed_tests;
679
680 // Copy all mipmaps and layers
681 const u32 block_width = params.GetDefaultBlockWidth();
682 const u32 block_height = params.GetDefaultBlockHeight();
683 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
684 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
685 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
686 if (width < block_width || height < block_height) {
687 // Current APIs forbid copying small compressed textures, avoid errors
688 break;
689 }
690 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
691 src_params.depth);
692 TryCopyImage(surface, new_surface, copy_params);
693 }
694 }
695 if (passed_tests == 0) {
696 return std::nullopt;
697 }
698 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
699 // In Accurate GPU all tests should pass, else we recycle
700 return std::nullopt;
701 }
702 656
703 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); 657 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
704 for (const auto& surface : overlaps) { 658 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
705 Unregister(surface); 659 if constexpr (FRAMEBUFFER_BLITS) {
706 } 660 // OpenGL blits from framebuffers, not images
661 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
662 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
663 copy.filter, copy.operation);
664 } else {
665 // Vulkan can blit images, but it lacks format reinterpretations
666 // Provide a framebuffer in case it's necessary
667 ImageView& dst_view = slot_image_views[dst_view_id];
668 ImageView& src_view = slot_image_views[src_view_id];
669 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
670 copy.operation);
671 }
672}
707 673
708 new_surface->MarkAsModified(modified, Tick()); 674template <class P>
709 Register(new_surface); 675void TextureCache<P>::InvalidateColorBuffer(size_t index) {
710 return {{new_surface, new_surface->GetMainView()}}; 676 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
711 } 677 color_buffer_id = FindColorBuffer(index, false);
712 678 if (!color_buffer_id) {
713 /** 679 LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
714 * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D 680 return;
715 * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of 681 }
716 * the HLE methods. 682 // When invalidating a color buffer, the old contents are no longer relevant
717 * 683 ImageView& color_buffer = slot_image_views[color_buffer_id];
718 * @param overlaps The overlapping surfaces registered in the cache. 684 Image& image = slot_images[color_buffer.image_id];
719 * @param params The parameters on the new surface. 685 image.flags &= ~ImageFlagBits::CpuModified;
720 * @param gpu_addr The starting address of the new surface. 686 image.flags &= ~ImageFlagBits::GpuModified;
721 * @param cpu_addr The starting address of the new surface on physical memory.
722 * @param preserve_contents Indicates that the new surface should be loaded from memory or
723 * left blank.
724 */
725 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
726 const SurfaceParams& params,
727 GPUVAddr gpu_addr, VAddr cpu_addr,
728 bool preserve_contents) {
729 if (params.target != SurfaceTarget::Texture3D) {
730 for (const auto& surface : overlaps) {
731 if (!surface->MatchTarget(params.target)) {
732 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
733 if (Settings::IsGPULevelExtreme()) {
734 return std::nullopt;
735 }
736 Unregister(surface);
737 return InitializeSurface(gpu_addr, params, preserve_contents);
738 }
739 return std::nullopt;
740 }
741 if (surface->GetCpuAddr() != cpu_addr) {
742 continue;
743 }
744 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
745 return std::make_pair(surface, surface->GetMainView());
746 }
747 }
748 return InitializeSurface(gpu_addr, params, preserve_contents);
749 }
750 687
751 if (params.num_levels > 1) { 688 runtime.InvalidateColorBuffer(color_buffer, index);
752 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach 689}
753 return std::nullopt;
754 }
755 690
756 if (overlaps.size() == 1) { 691template <class P>
757 const auto& surface = overlaps[0]; 692void TextureCache<P>::InvalidateDepthBuffer() {
758 const SurfaceParams& overlap_params = surface->GetSurfaceParams(); 693 ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
759 // Don't attempt to render to textures with more than one level for now 694 depth_buffer_id = FindDepthBuffer(false);
760 // The texture has to be to the right or the sample address if we want to render to it 695 if (!depth_buffer_id) {
761 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { 696 LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
762 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); 697 return;
763 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 698 }
764 if (slice < overlap_params.depth) { 699 // When invalidating the depth buffer, the old contents are no longer relevant
765 auto view = surface->Emplace3DView(slice, params.depth, 0, 1); 700 ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
766 return std::make_pair(std::move(surface), std::move(view)); 701 image.flags &= ~ImageFlagBits::CpuModified;
767 } 702 image.flags &= ~ImageFlagBits::GpuModified;
768 }
769 }
770 703
771 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 704 ImageView& depth_buffer = slot_image_views[depth_buffer_id];
772 bool modified = false; 705 runtime.InvalidateDepthBuffer(depth_buffer);
706}
773 707
774 for (auto& surface : overlaps) { 708template <class P>
775 const SurfaceParams& src_params = surface->GetSurfaceParams(); 709typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
776 if (src_params.target != SurfaceTarget::Texture2D || 710 // TODO: Properly implement this
777 src_params.height != params.height || 711 const auto it = page_table.find(cpu_addr >> PAGE_SHIFT);
778 src_params.block_depth != params.block_depth || 712 if (it == page_table.end()) {
779 src_params.block_height != params.block_height) { 713 return nullptr;
780 return std::nullopt; 714 }
781 } 715 const auto& image_ids = it->second;
782 modified |= surface->IsModified(); 716 for (const ImageId image_id : image_ids) {
783 717 const ImageBase& image = slot_images[image_id];
784 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); 718 if (image.cpu_addr != cpu_addr) {
785 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 719 continue;
786 const u32 width = params.width;
787 const u32 height = params.height;
788 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
789 TryCopyImage(surface, new_surface, copy_params);
790 } 720 }
791 for (const auto& surface : overlaps) { 721 if (image.image_view_ids.empty()) {
792 Unregister(surface); 722 continue;
793 } 723 }
794 new_surface->MarkAsModified(modified, Tick()); 724 return &slot_image_views[image.image_view_ids.at(0)];
795 Register(new_surface); 725 }
796 726 return nullptr;
797 TView view = new_surface->GetMainView(); 727}
798 return std::make_pair(std::move(new_surface), std::move(view));
799 }
800
801 /**
802 * Gets the starting address and parameters of a candidate surface and tries
803 * to find a matching surface within the cache. This is done in 3 big steps:
804 *
805 * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
806 *
807 * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
808 * memory else we move to step 3.
809 *
810 * 3. Consists of figuring out the relationship between the candidate texture and the
811 * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
812 * there's many, we just try to reconstruct a new surface out of them based on the
813 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
814 * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
815 * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
816 * a new surface.
817 *
818 * @param gpu_addr The starting address of the candidate surface.
819 * @param params The parameters on the candidate surface.
820 * @param preserve_contents Indicates that the new surface should be loaded from memory or
821 * left blank.
822 * @param is_render Whether or not the surface is a render target.
823 **/
824 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
825 const SurfaceParams& params, bool preserve_contents,
826 bool is_render) {
827 // Step 1
828 // Check Level 1 Cache for a fast structural match. If candidate surface
829 // matches at certain level we are pretty much done.
830 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
831 TSurface& current_surface = iter->second;
832 const auto topological_result = current_surface->MatchesTopology(params);
833 if (topological_result != MatchTopologyResult::FullMatch) {
834 VectorSurface overlaps{current_surface};
835 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
836 topological_result);
837 }
838 728
839 const auto struct_result = current_surface->MatchesStructure(params); 729template <class P>
840 if (struct_result != MatchStructureResult::None) { 730bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
841 const auto& old_params = current_surface->GetSurfaceParams(); 731 return !uncommitted_downloads.empty();
842 const bool not_3d = params.target != SurfaceTarget::Texture3D && 732}
843 old_params.target != SurfaceTarget::Texture3D;
844 if (not_3d || current_surface->MatchTarget(params.target)) {
845 if (struct_result == MatchStructureResult::FullMatch) {
846 return ManageStructuralMatch(current_surface, params, is_render);
847 } else {
848 return RebuildSurface(current_surface, params, is_render);
849 }
850 }
851 }
852 }
853 733
854 // Step 2 734template <class P>
855 // Obtain all possible overlaps in the memory region 735bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
856 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 736 return !committed_downloads.empty() && !committed_downloads.front().empty();
857 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; 737}
858 738
859 // If none are found, we are done. we just load the surface and create it. 739template <class P>
860 if (overlaps.empty()) { 740void TextureCache<P>::CommitAsyncFlushes() {
861 return InitializeSurface(gpu_addr, params, preserve_contents); 741 // This is intentionally passing the value by copy
862 } 742 committed_downloads.push(uncommitted_downloads);
743 uncommitted_downloads.clear();
744}
863 745
864 // Step 3 746template <class P>
865 // Now we need to figure the relationship between the texture and its overlaps 747void TextureCache<P>::PopAsyncFlushes() {
866 // we do a topological test to ensure we can find some relationship. If it fails 748 if (committed_downloads.empty()) {
867 // immediately recycle the texture 749 return;
868 for (const auto& surface : overlaps) { 750 }
869 const auto topological_result = surface->MatchesTopology(params); 751 const std::span<const ImageId> download_ids = committed_downloads.front();
870 if (topological_result != MatchTopologyResult::FullMatch) { 752 if (download_ids.empty()) {
871 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 753 committed_downloads.pop();
872 topological_result); 754 return;
873 } 755 }
874 } 756 size_t total_size_bytes = 0;
757 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
761 size_t buffer_offset = 0;
762 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id];
764 const auto copies = FullDownloadCopies(image.info);
765 image.DownloadMemory(download_map, buffer_offset, copies);
766 buffer_offset += image.unswizzled_size_bytes;
767 }
768 // Wait for downloads to finish
769 runtime.Finish();
770
771 buffer_offset = 0;
772 const std::span<u8> download_span = download_map.Span();
773 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info);
776 const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
777 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
778 buffer_offset += image.unswizzled_size_bytes;
779 }
780 committed_downloads.pop();
781}
875 782
876 // Manage 3D textures 783template <class P>
877 if (params.block_depth > 0) { 784bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
878 auto surface = 785 bool is_modified = false;
879 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 786 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
880 if (surface) { 787 if (False(image.flags & ImageFlagBits::GpuModified)) {
881 return *surface; 788 return false;
882 }
883 } 789 }
790 is_modified = true;
791 return true;
792 });
793 return is_modified;
794}
884 795
885 // Split cases between 1 overlap or many. 796template <class P>
886 if (overlaps.size() == 1) { 797void TextureCache<P>::RefreshContents(Image& image) {
887 TSurface current_surface = overlaps[0]; 798 if (False(image.flags & ImageFlagBits::CpuModified)) {
888 // First check if the surface is within the overlap. If not, it means 799 // Only upload modified images
889 // two things either the candidate surface is a supertexture of the overlap 800 return;
890 // or they don't match in any known way. 801 }
891 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 802 image.flags &= ~ImageFlagBits::CpuModified;
892 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); 803 TrackImage(image);
893 if (view) {
894 return *view;
895 }
896 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
897 MatchTopologyResult::FullMatch);
898 }
899 // Now we check if the candidate is a mipmap/layer of the overlap
900 std::optional<TView> view =
901 current_surface->EmplaceView(params, gpu_addr, candidate_size);
902 if (view) {
903 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
904 if (is_mirage) {
905 // On a mirage view, we need to recreate the surface under this new view
906 // and then obtain a view again.
907 SurfaceParams new_params = current_surface->GetSurfaceParams();
908 const u32 wh = SurfaceParams::ConvertWidth(
909 new_params.width, new_params.pixel_format, params.pixel_format);
910 const u32 hh = SurfaceParams::ConvertHeight(
911 new_params.height, new_params.pixel_format, params.pixel_format);
912 new_params.width = wh;
913 new_params.height = hh;
914 new_params.pixel_format = params.pixel_format;
915 std::pair<TSurface, TView> pair =
916 RebuildSurface(current_surface, new_params, is_render);
917 std::optional<TView> mirage_view =
918 pair.first->EmplaceView(params, gpu_addr, candidate_size);
919 if (mirage_view)
920 return {pair.first, *mirage_view};
921 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
922 MatchTopologyResult::FullMatch);
923 }
924 return {current_surface, *view};
925 }
926 } else {
927 // If there are many overlaps, odds are they are subtextures of the candidate
928 // surface. We try to construct a new surface based on the candidate parameters,
929 // using the overlaps. If a single overlap fails, this will fail.
930 std::optional<std::pair<TSurface, TView>> view =
931 TryReconstructSurface(overlaps, params, gpu_addr);
932 if (view) {
933 return *view;
934 }
935 }
936 // We failed all the tests, recycle the overlaps into a new texture.
937 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
938 MatchTopologyResult::FullMatch);
939 }
940
941 /**
942 * Gets the starting address and parameters of a candidate surface and tries to find a
943 * matching surface within the cache that's similar to it. If there are many textures
944 * or the texture found if entirely incompatible, it will fail. If no texture is found, the
945 * blit will be unsuccessful.
946 *
947 * @param gpu_addr The starting address of the candidate surface.
948 * @param params The parameters on the candidate surface.
949 **/
950 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
951 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
952
953 if (!cpu_addr) {
954 Deduction result{};
955 result.type = DeductionType::DeductionFailed;
956 return result;
957 }
958 804
959 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { 805 if (image.info.num_samples > 1) {
960 TSurface& current_surface = iter->second; 806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
961 const auto topological_result = current_surface->MatchesTopology(params); 807 return;
962 if (topological_result != MatchTopologyResult::FullMatch) { 808 }
963 Deduction result{}; 809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
964 result.type = DeductionType::DeductionFailed; 810 UploadImageContents(image, map, 0);
965 return result; 811 runtime.InsertUploadMemoryBarrier();
966 } 812}
967 const auto struct_result = current_surface->MatchesStructure(params);
968 if (struct_result != MatchStructureResult::None &&
969 current_surface->MatchTarget(params.target)) {
970 Deduction result{};
971 result.type = DeductionType::DeductionComplete;
972 result.surface = current_surface;
973 return result;
974 }
975 }
976 813
977 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 814template <class P>
978 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; 815template <typename MapBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
818 const GPUVAddr gpu_addr = image.gpu_addr;
819
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
821 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
822 const auto uploads = FullUploadSwizzles(image.info);
823 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
824 } else if (True(image.flags & ImageFlagBits::Converted)) {
825 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
826 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
827 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
828 image.UploadMemory(map, buffer_offset, copies);
829 } else if (image.info.type == ImageType::Buffer) {
830 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
831 image.UploadMemory(map, buffer_offset, copies);
832 } else {
833 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
834 image.UploadMemory(map, buffer_offset, copies);
835 }
836}
979 837
980 if (overlaps.empty()) { 838template <class P>
981 Deduction result{}; 839ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
982 result.type = DeductionType::DeductionIncomplete; 840 if (!IsValidAddress(gpu_memory, config)) {
983 return result; 841 return NULL_IMAGE_VIEW_ID;
984 } 842 }
843 const auto [pair, is_new] = image_views.try_emplace(config);
844 ImageViewId& image_view_id = pair->second;
845 if (is_new) {
846 image_view_id = CreateImageView(config);
847 }
848 return image_view_id;
849}
985 850
986 if (overlaps.size() > 1) { 851template <class P>
987 Deduction result{}; 852ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
988 result.type = DeductionType::DeductionFailed; 853 const ImageInfo info(config);
989 return result; 854 const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
990 } else { 855 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
991 Deduction result{}; 856 if (!image_id) {
992 result.type = DeductionType::DeductionComplete; 857 return NULL_IMAGE_VIEW_ID;
993 result.surface = overlaps[0];
994 return result;
995 }
996 } 858 }
859 ImageBase& image = slot_images[image_id];
860 const SubresourceBase base = image.TryFindBase(config.Address()).value();
861 ASSERT(base.level == 0);
862 const ImageViewInfo view_info(config, base.layer);
863 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
864 ImageViewBase& image_view = slot_image_views[image_view_id];
865 image_view.flags |= ImageViewFlagBits::Strong;
866 image.flags |= ImageFlagBits::Strong;
867 return image_view_id;
868}
997 869
998 /** 870template <class P>
999 * Gets a null surface based on a target texture. 871ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1000 * @param target The target of the null surface. 872 RelaxedOptions options) {
1001 */ 873 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
1002 TView GetNullSurface(SurfaceTarget target) { 874 return image_id;
1003 const u32 i_target = static_cast<u32>(target); 875 }
1004 if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { 876 return InsertImage(info, gpu_addr, options);
1005 return it->second->GetMainView(); 877}
1006 } 878
1007 SurfaceParams params{}; 879template <class P>
1008 params.target = target; 880ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1009 params.is_tiled = false; 881 RelaxedOptions options) {
1010 params.srgb_conversion = false; 882 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1011 params.is_layered = 883 if (!cpu_addr) {
1012 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || 884 return ImageId{};
1013 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; 885 }
1014 params.block_width = 0; 886 ImageId image_id;
1015 params.block_height = 0; 887 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1016 params.block_depth = 0; 888 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1017 params.tile_width_spacing = 1; 889 const bool strict_size = False(options & RelaxedOptions::Size) &&
1018 params.width = 1; 890 True(existing_image.flags & ImageFlagBits::Strong);
1019 params.height = 1; 891 const ImageInfo& existing = existing_image.info;
1020 params.depth = 1; 892 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
1021 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { 893 existing.pitch == info.pitch &&
1022 params.depth = 6; 894 IsPitchLinearSameSize(existing, info, strict_size) &&
1023 } 895 IsViewCompatible(existing.format, info.format)) {
1024 params.pitch = 4; 896 image_id = existing_image_id;
1025 params.num_levels = 1; 897 return true;
1026 params.emulated_levels = 1; 898 }
1027 params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; 899 } else if (IsSubresource(info, existing_image, gpu_addr, options)) {
1028 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 900 image_id = existing_image_id;
1029 auto surface = CreateSurface(0ULL, params); 901 return true;
1030 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
1031 surface->UploadTexture(invalid_memory);
1032 surface->MarkAsModified(false, Tick());
1033 invalid_cache.emplace(i_target, surface);
1034 return surface->GetMainView();
1035 }
1036
1037 /**
1038 * Gets the a source and destination starting address and parameters,
1039 * and tries to deduce if they are supposed to be depth textures. If so, their
1040 * parameters are modified and fixed into so.
1041 *
1042 * @param src_params The parameters of the candidate surface.
1043 * @param dst_params The parameters of the destination surface.
1044 * @param src_gpu_addr The starting address of the candidate surface.
1045 * @param dst_gpu_addr The starting address of the destination surface.
1046 **/
1047 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1048 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1049 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1050 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1051 if (deduced_src.Failed() || deduced_dst.Failed()) {
1052 return;
1053 } 902 }
903 return false;
904 };
905 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
906 return image_id;
907}
1054 908
1055 const bool incomplete_src = deduced_src.Incomplete(); 909template <class P>
1056 const bool incomplete_dst = deduced_dst.Incomplete(); 910ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
911 RelaxedOptions options) {
912 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
913 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
914 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
915 const Image& image = slot_images[image_id];
916 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
917 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
918 if (is_new) {
919 it->second = slot_image_allocs.insert();
920 }
921 slot_image_allocs[it->second].images.push_back(image_id);
922 return image_id;
923}
1057 924
1058 if (incomplete_src && incomplete_dst) { 925template <class P>
926ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
927 ImageInfo new_info = info;
928 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
929 std::vector<ImageId> overlap_ids;
930 std::vector<ImageId> left_aliased_ids;
931 std::vector<ImageId> right_aliased_ids;
932 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
933 if (info.type != overlap.info.type) {
1059 return; 934 return;
1060 } 935 }
1061 936 if (info.type == ImageType::Linear) {
1062 const bool any_incomplete = incomplete_src || incomplete_dst; 937 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1063 938 // Alias linear images with the same pitch
1064 if (!any_incomplete) { 939 left_aliased_ids.push_back(overlap_id);
1065 if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
1066 return;
1067 }
1068 } else {
1069 if (incomplete_src && !(deduced_dst.IsDepth())) {
1070 return;
1071 }
1072
1073 if (incomplete_dst && !(deduced_src.IsDepth())) {
1074 return;
1075 } 940 }
941 return;
942 }
943 const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true);
944 if (solution) {
945 gpu_addr = solution->gpu_addr;
946 cpu_addr = solution->cpu_addr;
947 new_info.resources = solution->resources;
948 overlap_ids.push_back(overlap_id);
949 return;
950 }
951 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
952 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
953 if (IsSubresource(new_info, overlap, gpu_addr, options)) {
954 left_aliased_ids.push_back(overlap_id);
955 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) {
956 right_aliased_ids.push_back(overlap_id);
1076 } 957 }
958 });
959 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
960 Image& new_image = slot_images[new_image_id];
1077 961
1078 const auto inherit_format = [](SurfaceParams& to, TSurface from) { 962 // TODO: Only upload what we need
1079 const SurfaceParams& params = from->GetSurfaceParams(); 963 RefreshContents(new_image);
1080 to.pixel_format = params.pixel_format; 964
1081 to.type = params.type; 965 for (const ImageId overlap_id : overlap_ids) {
1082 }; 966 Image& overlap = slot_images[overlap_id];
1083 // Now we got the cases where one or both is Depth and the other is not known 967 if (overlap.info.num_samples != new_image.info.num_samples) {
1084 if (!incomplete_src) { 968 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
1085 inherit_format(src_params, deduced_src.surface);
1086 } else { 969 } else {
1087 inherit_format(src_params, deduced_dst.surface); 970 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
971 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
972 runtime.CopyImage(new_image, overlap, copies);
1088 } 973 }
1089 if (!incomplete_dst) { 974 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1090 inherit_format(dst_params, deduced_dst.surface); 975 UntrackImage(overlap);
1091 } else {
1092 inherit_format(dst_params, deduced_src.surface);
1093 } 976 }
977 UnregisterImage(overlap_id);
978 DeleteImage(overlap_id);
979 }
980 ImageBase& new_image_base = new_image;
981 for (const ImageId aliased_id : right_aliased_ids) {
982 ImageBase& aliased = slot_images[aliased_id];
983 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
984 }
985 for (const ImageId aliased_id : left_aliased_ids) {
986 ImageBase& aliased = slot_images[aliased_id];
987 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1094 } 988 }
989 RegisterImage(new_image_id);
990 return new_image_id;
991}
1095 992
1096 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 993template <class P>
1097 bool preserve_contents) { 994typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1098 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 995 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
1099 Register(new_surface); 996 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
1100 if (preserve_contents) { 997 const GPUVAddr dst_addr = dst.Address();
1101 LoadSurface(new_surface); 998 const GPUVAddr src_addr = src.Address();
1102 } 999 ImageInfo dst_info(dst);
1103 return {new_surface, new_surface->GetMainView()}; 1000 ImageInfo src_info(src);
1001 ImageId dst_id;
1002 ImageId src_id;
1003 do {
1004 has_deleted_images = false;
1005 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
1006 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
1007 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1008 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1009 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
1010 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
1011 continue;
1012 }
1013 if (!dst_id) {
1014 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
1015 }
1016 if (!src_id) {
1017 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
1018 }
1019 } while (has_deleted_images);
1020 return BlitImages{
1021 .dst_id = dst_id,
1022 .src_id = src_id,
1023 .dst_format = dst_info.format,
1024 .src_format = src_info.format,
1025 };
1026}
1027
1028template <class P>
1029SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1030 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1031 return NULL_SAMPLER_ID;
1032 }
1033 const auto [pair, is_new] = samplers.try_emplace(config);
1034 if (is_new) {
1035 pair->second = slot_samplers.insert(runtime, config);
1104 } 1036 }
1037 return pair->second;
1038}
1105 1039
1106 void LoadSurface(const TSurface& surface) { 1040template <class P>
1107 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1041ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1108 surface->LoadBuffer(gpu_memory, staging_cache); 1042 const auto& regs = maxwell3d.regs;
1109 surface->UploadTexture(staging_cache.GetBuffer(0)); 1043 if (index >= regs.rt_control.count) {
1110 surface->MarkAsModified(false, Tick()); 1044 return ImageViewId{};
1045 }
1046 const auto& rt = regs.rt[index];
1047 const GPUVAddr gpu_addr = rt.Address();
1048 if (gpu_addr == 0) {
1049 return ImageViewId{};
1050 }
1051 if (rt.format == Tegra::RenderTargetFormat::NONE) {
1052 return ImageViewId{};
1111 } 1053 }
1054 const ImageInfo info(regs, index);
1055 return FindRenderTargetView(info, gpu_addr, is_clear);
1056}
1112 1057
1113 void FlushSurface(const TSurface& surface) { 1058template <class P>
1114 if (!surface->IsModified()) { 1059ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1115 return; 1060 const auto& regs = maxwell3d.regs;
1116 } 1061 if (!regs.zeta_enable) {
1117 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1062 return ImageViewId{};
1118 surface->DownloadTexture(staging_cache.GetBuffer(0)); 1063 }
1119 surface->FlushBuffer(gpu_memory, staging_cache); 1064 const GPUVAddr gpu_addr = regs.zeta.Address();
1120 surface->MarkAsModified(false, Tick()); 1065 if (gpu_addr == 0) {
1121 } 1066 return ImageViewId{};
1122
1123 void RegisterInnerCache(TSurface& surface) {
1124 const VAddr cpu_addr = surface->GetCpuAddr();
1125 VAddr start = cpu_addr >> registry_page_bits;
1126 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1127 l1_cache[cpu_addr] = surface;
1128 while (start <= end) {
1129 registry[start].push_back(surface);
1130 start++;
1131 }
1132 } 1067 }
1068 const ImageInfo info(regs);
1069 return FindRenderTargetView(info, gpu_addr, is_clear);
1070}
1133 1071
1134 void UnregisterInnerCache(TSurface& surface) { 1072template <class P>
1135 const VAddr cpu_addr = surface->GetCpuAddr(); 1073ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
1136 VAddr start = cpu_addr >> registry_page_bits; 1074 bool is_clear) {
1137 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; 1075 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1138 l1_cache.erase(cpu_addr); 1076 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
1139 while (start <= end) { 1077 if (!image_id) {
1140 auto& reg{registry[start]}; 1078 return NULL_IMAGE_VIEW_ID;
1141 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1079 }
1142 start++; 1080 Image& image = slot_images[image_id];
1143 } 1081 const ImageViewType view_type = RenderTargetImageViewType(info);
1082 SubresourceBase base;
1083 if (image.info.type == ImageType::Linear) {
1084 base = SubresourceBase{.level = 0, .layer = 0};
1085 } else {
1086 base = image.TryFindBase(gpu_addr).value();
1144 } 1087 }
1088 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
1089 const SubresourceRange range{
1090 .base = base,
1091 .extent = {.levels = 1, .layers = layers},
1092 };
1093 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
1094}
1145 1095
1146 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1096template <class P>
1147 if (size == 0) { 1097template <typename Func>
1148 return {}; 1098void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
1099 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1100 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1101 boost::container::small_vector<ImageId, 32> images;
1102 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
1103 const auto it = page_table.find(page);
1104 if (it == page_table.end()) {
1105 if constexpr (BOOL_BREAK) {
1106 return false;
1107 } else {
1108 return;
1109 }
1149 } 1110 }
1150 const VAddr cpu_addr_end = cpu_addr + size; 1111 for (const ImageId image_id : it->second) {
1151 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1112 Image& image = slot_images[image_id];
1152 VectorSurface surfaces; 1113 if (True(image.flags & ImageFlagBits::Picked)) {
1153 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1154 const auto it = registry.find(start);
1155 if (it == registry.end()) {
1156 continue; 1114 continue;
1157 } 1115 }
1158 for (auto& surface : it->second) { 1116 if (!image.Overlaps(cpu_addr, size)) {
1159 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { 1117 continue;
1160 continue; 1118 }
1119 image.flags |= ImageFlagBits::Picked;
1120 images.push_back(image_id);
1121 if constexpr (BOOL_BREAK) {
1122 if (func(image_id, image)) {
1123 return true;
1161 } 1124 }
1162 surface->MarkAsPicked(true); 1125 } else {
1163 surfaces.push_back(surface); 1126 func(image_id, image);
1164 } 1127 }
1165 } 1128 }
1166 for (auto& surface : surfaces) { 1129 if constexpr (BOOL_BREAK) {
1167 surface->MarkAsPicked(false); 1130 return false;
1168 } 1131 }
1169 return surfaces; 1132 });
1133 for (const ImageId image_id : images) {
1134 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1170 } 1135 }
1136}
1171 1137
1172 void ReserveSurface(const SurfaceParams& params, TSurface surface) { 1138template <class P>
1173 surface_reserve[params].push_back(std::move(surface)); 1139ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1140 Image& image = slot_images[image_id];
1141 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1142 return image_view_id;
1174 } 1143 }
1144 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1145 image.InsertView(info, image_view_id);
1146 return image_view_id;
1147}
1148
1149template <class P>
1150void TextureCache<P>::RegisterImage(ImageId image_id) {
1151 ImageBase& image = slot_images[image_id];
1152 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1153 "Trying to register an already registered image");
1154 image.flags |= ImageFlagBits::Registered;
1155 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1156 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1157}
1175 1158
1176 TSurface TryGetReservedSurface(const SurfaceParams& params) { 1159template <class P>
1177 auto search{surface_reserve.find(params)}; 1160void TextureCache<P>::UnregisterImage(ImageId image_id) {
1178 if (search == surface_reserve.end()) { 1161 Image& image = slot_images[image_id];
1179 return {}; 1162 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1163 "Trying to unregister an already registered image");
1164 image.flags &= ~ImageFlagBits::Registered;
1165 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1166 const auto page_it = page_table.find(page);
1167 if (page_it == page_table.end()) {
1168 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT);
1169 return;
1180 } 1170 }
1181 for (auto& surface : search->second) { 1171 std::vector<ImageId>& image_ids = page_it->second;
1182 if (!surface->IsRegistered()) { 1172 const auto vector_it = std::ranges::find(image_ids, image_id);
1183 return surface; 1173 if (vector_it == image_ids.end()) {
1184 } 1174 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT);
1175 return;
1185 } 1176 }
1186 return {}; 1177 image_ids.erase(vector_it);
1187 } 1178 });
1179}
1188 1180
1189 /// Try to do an image copy logging when formats are incompatible. 1181template <class P>
1190 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { 1182void TextureCache<P>::TrackImage(ImageBase& image) {
1191 const SurfaceParams& src_params = src->GetSurfaceParams(); 1183 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1192 const SurfaceParams& dst_params = dst->GetSurfaceParams(); 1184 image.flags |= ImageFlagBits::Tracked;
1193 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { 1185 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1194 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", 1186}
1195 static_cast<int>(dst_params.pixel_format), 1187
1196 static_cast<int>(src_params.pixel_format)); 1188template <class P>
1197 return; 1189void TextureCache<P>::UntrackImage(ImageBase& image) {
1190 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1191 image.flags &= ~ImageFlagBits::Tracked;
1192 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1193}
1194
1195template <class P>
1196void TextureCache<P>::DeleteImage(ImageId image_id) {
1197 ImageBase& image = slot_images[image_id];
1198 const GPUVAddr gpu_addr = image.gpu_addr;
1199 const auto alloc_it = image_allocs_table.find(gpu_addr);
1200 if (alloc_it == image_allocs_table.end()) {
1201 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1202 gpu_addr);
1203 return;
1204 }
1205 const ImageAllocId alloc_id = alloc_it->second;
1206 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1207 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1208 if (alloc_image_it == alloc_images.end()) {
1209 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1210 return;
1211 }
1212 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1213 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1214
1215 // Mark render targets as dirty
1216 auto& dirty = maxwell3d.dirty.flags;
1217 dirty[Dirty::RenderTargets] = true;
1218 dirty[Dirty::ZetaBuffer] = true;
1219 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1220 dirty[Dirty::ColorBuffer0 + rt] = true;
1221 }
1222 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1223 for (const ImageViewId image_view_id : image_view_ids) {
1224 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1225 if (render_targets.depth_buffer_id == image_view_id) {
1226 render_targets.depth_buffer_id = ImageViewId{};
1198 } 1227 }
1199 ImageCopy(src, dst, copy);
1200 } 1228 }
1229 RemoveImageViewReferences(image_view_ids);
1230 RemoveFramebuffers(image_view_ids);
1231
1232 for (const AliasedImage& alias : image.aliased_images) {
1233 ImageBase& other_image = slot_images[alias.id];
1234 [[maybe_unused]] const size_t num_removed_aliases =
1235 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1236 return other_alias.id == image_id;
1237 });
1238 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1239 num_removed_aliases);
1240 }
1241 for (const ImageViewId image_view_id : image_view_ids) {
1242 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1243 slot_image_views.erase(image_view_id);
1244 }
1245 sentenced_images.Push(std::move(slot_images[image_id]));
1246 slot_images.erase(image_id);
1201 1247
1202 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1248 alloc_images.erase(alloc_image_it);
1203 return siblings_table[static_cast<std::size_t>(format)]; 1249 if (alloc_images.empty()) {
1250 image_allocs_table.erase(alloc_it);
1204 } 1251 }
1252 if constexpr (ENABLE_VALIDATION) {
1253 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1254 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1255 }
1256 graphics_image_table.Invalidate();
1257 compute_image_table.Invalidate();
1258 has_deleted_images = true;
1259}
1205 1260
1206 /// Returns true the shader sampler entry is compatible with the TIC texture type. 1261template <class P>
1207 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, 1262void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1208 const VideoCommon::Shader::Sampler& entry) { 1263 auto it = image_views.begin();
1209 const auto shader_type = entry.type; 1264 while (it != image_views.end()) {
1210 switch (tic_type) { 1265 const auto found = std::ranges::find(removed_views, it->second);
1211 case Tegra::Texture::TextureType::Texture1D: 1266 if (found != removed_views.end()) {
1212 case Tegra::Texture::TextureType::Texture1DArray: 1267 it = image_views.erase(it);
1213 return shader_type == Tegra::Shader::TextureType::Texture1D; 1268 } else {
1214 case Tegra::Texture::TextureType::Texture1DBuffer: 1269 ++it;
1215 // TODO(Rodrigo): Assume as valid for now
1216 return true;
1217 case Tegra::Texture::TextureType::Texture2D:
1218 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1219 return shader_type == Tegra::Shader::TextureType::Texture2D;
1220 case Tegra::Texture::TextureType::Texture2DArray:
1221 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1222 shader_type == Tegra::Shader::TextureType::TextureCube;
1223 case Tegra::Texture::TextureType::Texture3D:
1224 return shader_type == Tegra::Shader::TextureType::Texture3D;
1225 case Tegra::Texture::TextureType::TextureCubeArray:
1226 case Tegra::Texture::TextureType::TextureCubemap:
1227 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1228 return true;
1229 }
1230 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
1231 } 1270 }
1232 UNREACHABLE();
1233 return true;
1234 } 1271 }
1272}
1235 1273
1236 struct FramebufferTargetInfo { 1274template <class P>
1237 TSurface target; 1275void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1238 TView view; 1276 auto it = framebuffers.begin();
1239 }; 1277 while (it != framebuffers.end()) {
1240 1278 if (it->first.Contains(removed_views)) {
1241 void AsyncFlushSurface(TSurface& surface) { 1279 it = framebuffers.erase(it);
1242 if (!uncommitted_flushes) { 1280 } else {
1243 uncommitted_flushes = std::make_shared<std::list<TSurface>>(); 1281 ++it;
1244 } 1282 }
1245 uncommitted_flushes->push_back(surface);
1246 } 1283 }
1284}
1247 1285
1248 VideoCore::RasterizerInterface& rasterizer; 1286template <class P>
1249 Tegra::Engines::Maxwell3D& maxwell3d; 1287void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1250 Tegra::MemoryManager& gpu_memory; 1288 image.flags |= ImageFlagBits::GpuModified;
1251 1289 image.modification_tick = ++modification_tick;
1252 FormatLookupTable format_lookup_table; 1290}
1253 FormatCompatibility format_compatibility;
1254
1255 u64 ticks{};
1256
1257 // Guards the cache for protection conflicts.
1258 bool guard_render_targets{};
1259 bool guard_samplers{};
1260
1261 // The siblings table is for formats that can inter exchange with one another
1262 // without causing issues. This is only valid when a conflict occurs on a non
1263 // rendering use.
1264 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
1265
1266 // The internal Cache is different for the Texture Cache. It's based on buckets
1267 // of 1MB. This fits better for the purpose of this cache as textures are normaly
1268 // large in size.
1269 static constexpr u64 registry_page_bits{20};
1270 static constexpr u64 registry_page_size{1 << registry_page_bits};
1271 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1272 1291
1273 static constexpr u32 DEPTH_RT = 8; 1292template <class P>
1274 static constexpr u32 NO_RT = 0xFFFFFFFF; 1293void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1294 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1295 ImageBase& image = slot_images[image_id];
1296 u64 most_recent_tick = image.modification_tick;
1297 for (const AliasedImage& aliased : image.aliased_images) {
1298 ImageBase& aliased_image = slot_images[aliased.id];
1299 if (image.modification_tick < aliased_image.modification_tick) {
1300 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1301 aliased_images.push_back(&aliased);
1302 }
1303 }
1304 if (aliased_images.empty()) {
1305 return;
1306 }
1307 image.modification_tick = most_recent_tick;
1308 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1309 const ImageBase& lhs_image = slot_images[lhs->id];
1310 const ImageBase& rhs_image = slot_images[rhs->id];
1311 return lhs_image.modification_tick < rhs_image.modification_tick;
1312 });
1313 for (const AliasedImage* const aliased : aliased_images) {
1314 CopyImage(image_id, aliased->id, aliased->copies);
1315 }
1316}
1275 1317
1276 // The L1 Cache is used for fast texture lookup before checking the overlaps 1318template <class P>
1277 // This avoids calculating size and other stuffs. 1319void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1278 std::unordered_map<VAddr, TSurface> l1_cache; 1320 Image& image = slot_images[image_id];
1321 if (invalidate) {
1322 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1323 if (False(image.flags & ImageFlagBits::Tracked)) {
1324 TrackImage(image);
1325 }
1326 } else {
1327 RefreshContents(image);
1328 SynchronizeAliases(image_id);
1329 }
1330 if (is_modification) {
1331 MarkModification(image);
1332 }
1333 image.frame_tick = frame_tick;
1334}
1279 1335
1280 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1336template <class P>
1281 /// previously been used. This is to prevent surfaces from being constantly created and 1337void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1282 /// destroyed when used with different surface parameters. 1338 bool invalidate) {
1283 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; 1339 if (!image_view_id) {
1284 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> 1340 return;
1285 render_targets; 1341 }
1286 FramebufferTargetInfo depth_buffer; 1342 const ImageViewBase& image_view = slot_image_views[image_view_id];
1343 PrepareImage(image_view.image_id, is_modification, invalidate);
1344}
1287 1345
1288 std::vector<TSurface> sampled_textures; 1346template <class P>
1347void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1348 Image& dst = slot_images[dst_id];
1349 Image& src = slot_images[src_id];
1350 const auto dst_format_type = GetFormatType(dst.info.format);
1351 const auto src_format_type = GetFormatType(src.info.format);
1352 if (src_format_type == dst_format_type) {
1353 if constexpr (HAS_EMULATED_COPIES) {
1354 if (!runtime.CanImageBeCopied(dst, src)) {
1355 return runtime.EmulateCopyImage(dst, src, copies);
1356 }
1357 }
1358 return runtime.CopyImage(dst, src, copies);
1359 }
1360 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1361 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1362 for (const ImageCopy& copy : copies) {
1363 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1364 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1365 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1366 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1367
1368 const SubresourceBase dst_base{
1369 .level = copy.dst_subresource.base_level,
1370 .layer = copy.dst_subresource.base_layer,
1371 };
1372 const SubresourceBase src_base{
1373 .level = copy.src_subresource.base_level,
1374 .layer = copy.src_subresource.base_layer,
1375 };
1376 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1377 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1378 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1379 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1380 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1381 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1382 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1383 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1384 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1385 ImageView& dst_view = slot_image_views[dst_view_id];
1386 ImageView& src_view = slot_image_views[src_view_id];
1387 [[maybe_unused]] const Extent3D expected_size{
1388 .width = std::min(dst_view.size.width, src_view.size.width),
1389 .height = std::min(dst_view.size.height, src_view.size.height),
1390 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1391 };
1392 UNIMPLEMENTED_IF(copy.extent != expected_size);
1289 1393
1290 /// This cache stores null surfaces in order to be used as a placeholder 1394 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1291 /// for invalid texture calls. 1395 }
1292 std::unordered_map<u32, TSurface> invalid_cache; 1396}
1293 std::vector<u8> invalid_memory;
1294 1397
1295 std::list<TSurface> marked_for_unregister; 1398template <class P>
1399void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1400 if (*old_id == new_id) {
1401 return;
1402 }
1403 if (*old_id) {
1404 const ImageViewBase& old_view = slot_image_views[*old_id];
1405 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1406 uncommitted_downloads.push_back(old_view.image_id);
1407 }
1408 }
1409 *old_id = new_id;
1410}
1296 1411
1297 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; 1412template <class P>
1298 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; 1413std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1414 ImageId image_id, const ImageViewInfo& view_info) {
1415 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1416 const ImageBase& image = slot_images[image_id];
1417 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1418 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1419 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1420 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1421 const u32 num_samples = image.info.num_samples;
1422 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1423 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1424 .color_buffer_ids = {color_view_id},
1425 .depth_buffer_id = depth_view_id,
1426 .size = {extent.width >> samples_x, extent.height >> samples_y},
1427 });
1428 return {framebuffer_id, view_id};
1429}
1299 1430
1300 StagingCache staging_cache; 1431template <class P>
1301 std::recursive_mutex mutex; 1432bool TextureCache<P>::IsFullClear(ImageViewId id) {
1302}; 1433 if (!id) {
1434 return true;
1435 }
1436 const ImageViewBase& image_view = slot_image_views[id];
1437 const ImageBase& image = slot_images[image_view.image_id];
1438 const Extent3D size = image_view.size;
1439 const auto& regs = maxwell3d.regs;
1440 const auto& scissor = regs.scissor_test[0];
1441 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1442 // Images with multiple resources can't be cleared in a single call
1443 return false;
1444 }
1445 if (regs.clear_flags.scissor == 0) {
1446 // If scissor testing is disabled, the clear is always full
1447 return true;
1448 }
1449 // Make sure the clear covers all texels in the subresource
1450 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1451 scissor.max_y >= size.height;
1452}
1303 1453
1304} // namespace VideoCommon 1454} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
new file mode 100644
index 000000000..2ad2d72a6
--- /dev/null
+++ b/src/video_core/texture_cache/types.h
@@ -0,0 +1,140 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9#include "video_core/texture_cache/slot_vector.h"
10
11namespace VideoCommon {
12
13constexpr size_t NUM_RT = 8;
14constexpr size_t MAX_MIP_LEVELS = 14;
15
16constexpr SlotId CORRUPT_ID{0xfffffffe};
17
18using ImageId = SlotId;
19using ImageViewId = SlotId;
20using ImageAllocId = SlotId;
21using SamplerId = SlotId;
22using FramebufferId = SlotId;
23
24enum class ImageType : u32 {
25 e1D,
26 e2D,
27 e3D,
28 Linear,
29 Buffer,
30};
31
32enum class ImageViewType : u32 {
33 e1D,
34 e2D,
35 Cube,
36 e3D,
37 e1DArray,
38 e2DArray,
39 CubeArray,
40 Rect,
41 Buffer,
42};
43constexpr size_t NUM_IMAGE_VIEW_TYPES = 9;
44
45enum class RelaxedOptions : u32 {
46 Size = 1 << 0,
47 Format = 1 << 1,
48 Samples = 1 << 2,
49};
50DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
51
52struct Offset2D {
53 constexpr auto operator<=>(const Offset2D&) const noexcept = default;
54
55 s32 x;
56 s32 y;
57};
58
59struct Offset3D {
60 constexpr auto operator<=>(const Offset3D&) const noexcept = default;
61
62 s32 x;
63 s32 y;
64 s32 z;
65};
66
67struct Extent2D {
68 constexpr auto operator<=>(const Extent2D&) const noexcept = default;
69
70 u32 width;
71 u32 height;
72};
73
74struct Extent3D {
75 constexpr auto operator<=>(const Extent3D&) const noexcept = default;
76
77 u32 width;
78 u32 height;
79 u32 depth;
80};
81
82struct SubresourceLayers {
83 s32 base_level = 0;
84 s32 base_layer = 0;
85 s32 num_layers = 1;
86};
87
88struct SubresourceBase {
89 constexpr auto operator<=>(const SubresourceBase&) const noexcept = default;
90
91 s32 level = 0;
92 s32 layer = 0;
93};
94
95struct SubresourceExtent {
96 constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default;
97
98 s32 levels = 1;
99 s32 layers = 1;
100};
101
102struct SubresourceRange {
103 constexpr auto operator<=>(const SubresourceRange&) const noexcept = default;
104
105 SubresourceBase base;
106 SubresourceExtent extent;
107};
108
109struct ImageCopy {
110 SubresourceLayers src_subresource;
111 SubresourceLayers dst_subresource;
112 Offset3D src_offset;
113 Offset3D dst_offset;
114 Extent3D extent;
115};
116
117struct BufferImageCopy {
118 size_t buffer_offset;
119 size_t buffer_size;
120 u32 buffer_row_length;
121 u32 buffer_image_height;
122 SubresourceLayers image_subresource;
123 Offset3D image_offset;
124 Extent3D image_extent;
125};
126
127struct BufferCopy {
128 size_t src_offset;
129 size_t dst_offset;
130 size_t size;
131};
132
133struct SwizzleParameters {
134 Extent3D num_tiles;
135 Extent3D block;
136 size_t buffer_offset;
137 s32 level;
138};
139
140} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
new file mode 100644
index 000000000..9ed1fc007
--- /dev/null
+++ b/src/video_core/texture_cache/util.cpp
@@ -0,0 +1,1232 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This files contains code from Ryujinx
6// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
7// The sections using code from Ryujinx are marked with a link to the original version
8
9// MIT License
10//
11// Copyright (c) Ryujinx Team and Contributors
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
14// associated documentation files (the "Software"), to deal in the Software without restriction,
15// including without limitation the rights to use, copy, modify, merge, publish, distribute,
16// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
17// furnished to do so, subject to the following conditions:
18//
19// The above copyright notice and this permission notice shall be included in all copies or
20// substantial portions of the Software.
21//
22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
23// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
25// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27//
28
29#include <algorithm>
30#include <array>
31#include <numeric>
32#include <optional>
33#include <span>
34#include <vector>
35
36#include "common/alignment.h"
37#include "common/assert.h"
38#include "common/bit_util.h"
39#include "common/common_types.h"
40#include "common/div_ceil.h"
41#include "video_core/compatible_formats.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/surface.h"
45#include "video_core/texture_cache/decode_bc4.h"
46#include "video_core/texture_cache/format_lookup_table.h"
47#include "video_core/texture_cache/formatter.h"
48#include "video_core/texture_cache/samples_helper.h"
49#include "video_core/texture_cache/util.h"
50#include "video_core/textures/astc.h"
51#include "video_core/textures/decoders.h"
52
53namespace VideoCommon {
54
55namespace {
56
57using Tegra::Texture::GOB_SIZE;
58using Tegra::Texture::GOB_SIZE_SHIFT;
59using Tegra::Texture::GOB_SIZE_X;
60using Tegra::Texture::GOB_SIZE_X_SHIFT;
61using Tegra::Texture::GOB_SIZE_Y;
62using Tegra::Texture::GOB_SIZE_Y_SHIFT;
63using Tegra::Texture::GOB_SIZE_Z;
64using Tegra::Texture::GOB_SIZE_Z_SHIFT;
65using Tegra::Texture::MsaaMode;
66using Tegra::Texture::SwizzleTexture;
67using Tegra::Texture::TextureFormat;
68using Tegra::Texture::TextureType;
69using Tegra::Texture::TICEntry;
70using Tegra::Texture::UnswizzleTexture;
71using VideoCore::Surface::BytesPerBlock;
72using VideoCore::Surface::DefaultBlockHeight;
73using VideoCore::Surface::DefaultBlockWidth;
74using VideoCore::Surface::IsCopyCompatible;
75using VideoCore::Surface::IsPixelFormatASTC;
76using VideoCore::Surface::IsViewCompatible;
77using VideoCore::Surface::PixelFormatFromDepthFormat;
78using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
79using VideoCore::Surface::SurfaceType;
80
81constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
82
83struct LevelInfo {
84 Extent3D size;
85 Extent3D block;
86 Extent2D tile_size;
87 u32 bpp_log2;
88 u32 tile_width_spacing;
89};
90
91[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
92 if (shift == 0) {
93 return 0;
94 }
95 u32 x = unit_factor << (shift - 1);
96 if (x >= dimension) {
97 while (--shift) {
98 x >>= 1;
99 if (x < dimension) {
100 break;
101 }
102 }
103 }
104 return shift;
105}
106
107[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
108 return std::max<u32>(size >> level, 1);
109}
110
111[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
112 return Extent3D{
113 .width = AdjustMipSize(size.width, level),
114 .height = AdjustMipSize(size.height, level),
115 .depth = AdjustMipSize(size.depth, level),
116 };
117}
118
119[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) {
120 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
121 return Extent3D{
122 .width = size.width >> samples_x,
123 .height = size.height >> samples_y,
124 .depth = size.depth,
125 };
126}
127
128template <u32 GOB_EXTENT>
129[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
130 do {
131 while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
132 --block_size;
133 }
134 } while (level--);
135 return block_size;
136}
137
138[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
139 u32 level) {
140 return {
141 .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
142 .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
143 .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
144 };
145}
146
147[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
148 return {
149 .width = Common::DivCeil(size.width, tile_size.width),
150 .height = Common::DivCeil(size.height, tile_size.height),
151 .depth = size.depth,
152 };
153}
154
155[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
156 return std::countl_zero(bytes_per_block) ^ 0x1F;
157}
158
159[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
160 return BytesPerBlockLog2(BytesPerBlock(format));
161}
162
163[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
164 const Extent3D num_blocks = AdjustTileSize(size, tile_size);
165 return num_blocks.width * num_blocks.height * num_blocks.depth;
166}
167
168[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
169 return Common::DivCeil(AdjustMipSize(size, level), block_size);
170}
171
172[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
173 return config.Width() * config.Height() * BytesPerBlock(format);
174}
175
176[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
177 switch (type) {
178 case TextureType::Texture2D:
179 case TextureType::Texture2DArray:
180 case TextureType::Texture2DNoMipmap:
181 case TextureType::Texture3D:
182 case TextureType::TextureCubeArray:
183 case TextureType::TextureCubemap:
184 return true;
185 case TextureType::Texture1D:
186 case TextureType::Texture1DArray:
187 case TextureType::Texture1DBuffer:
188 return false;
189 }
190 return false;
191}
192
193[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
194 switch (type) {
195 case ImageType::e2D:
196 case ImageType::e3D:
197 case ImageType::Linear:
198 return true;
199 case ImageType::e1D:
200 case ImageType::Buffer:
201 return false;
202 }
203 UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
204}
205
206[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
207 switch (num_samples) {
208 case 1:
209 return {1, 1};
210 case 2:
211 return {2, 1};
212 case 4:
213 return {2, 2};
214 case 8:
215 return {4, 2};
216 case 16:
217 return {4, 4};
218 }
219 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
220 return {1, 1};
221}
222
223[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
224 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
225}
226
227[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
228 return Extent3D{
229 .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
230 .height = AdjustSize(info.size.height, level, info.tile_size.height),
231 .depth = AdjustMipSize(info.size.depth, level),
232 };
233}
234
235[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
236 const Extent3D blocks = NumLevelBlocks(info, level);
237 return Extent3D{
238 .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
239 .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height),
240 .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth),
241 };
242}
243
244[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
245 return Extent2D{
246 .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
247 .height = GOB_SIZE_Y_SHIFT + block_height,
248 };
249}
250
251[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
252 u32 block_depth) {
253 return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
254 num_tiles.depth < (1U << block_depth);
255}
256
257[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
258 u32 bpp_log2) {
259 if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
260 return GOB_SIZE_X_SHIFT - bpp_log2;
261 } else {
262 return gob.width;
263 }
264}
265
266[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
267 u32 tile_width_spacing) {
268 const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
269 return StrideAlignment(num_tiles, block, gob, bpp_log2);
270}
271
272[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
273 const Extent3D blocks = NumLevelBlocks(info, level);
274 const Extent2D gobs{
275 .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
276 .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT),
277 };
278 const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing);
279 const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
280 const u32 alignment = is_small ? 0 : info.tile_width_spacing;
281 return Extent2D{
282 .width = Common::AlignBits(gobs.width, alignment),
283 .height = gobs.height,
284 };
285}
286
287[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
288 const Extent3D blocks = NumLevelBlocks(info, level);
289 const Extent3D tile_shift = TileShift(info, level);
290 const Extent2D gobs = NumGobs(info, level);
291 return Extent3D{
292 .width = Common::DivCeilLog2(gobs.width, tile_shift.width),
293 .height = Common::DivCeilLog2(gobs.height, tile_shift.height),
294 .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth),
295 };
296}
297
298[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
299 const Extent3D tile_shift = TileShift(info, level);
300 const Extent3D tiles = LevelTiles(info, level);
301 const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
302 const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth;
303 return num_tiles << shift;
304}
305
306[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info,
307 u32 num_levels) {
308 ASSERT(num_levels <= MAX_MIP_LEVELS);
309 std::array<u32, MAX_MIP_LEVELS> sizes{};
310 for (u32 level = 0; level < num_levels; ++level) {
311 sizes[level] = CalculateLevelSize(info, level);
312 }
313 return sizes;
314}
315
316[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
317 u32 num_samples, u32 tile_width_spacing) {
318 const auto [samples_x, samples_y] = Samples(num_samples);
319 const u32 bytes_per_block = BytesPerBlock(format);
320 return {
321 .size =
322 {
323 .width = size.width * samples_x,
324 .height = size.height * samples_y,
325 .depth = size.depth,
326 },
327 .block = block,
328 .tile_size = DefaultBlockSize(format),
329 .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
330 .tile_width_spacing = tile_width_spacing,
331 };
332}
333
334[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
335 return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
336 info.tile_width_spacing);
337}
338
339[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
340 u32 num_samples, u32 tile_width_spacing,
341 u32 level) {
342 const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
343 u32 offset = 0;
344 for (u32 current_level = 0; current_level < level; ++current_level) {
345 offset += CalculateLevelSize(info, current_level);
346 }
347 return offset;
348}
349
350[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
351 u32 tile_size_y, u32 tile_width_spacing) {
352 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
353 if (tile_width_spacing > 0) {
354 const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
355 return Common::AlignBits(size_bytes, alignment_log2);
356 }
357 const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
358 while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
359 --block.height;
360 }
361 while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) {
362 --block.depth;
363 }
364 const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth;
365 const u32 num_blocks = size_bytes >> block_shift;
366 if (size_bytes != num_blocks << block_shift) {
367 return (num_blocks + 1) << block_shift;
368 }
369 return size_bytes;
370}
371
372[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
373 const ImageBase& overlap,
374 bool strict_size) {
375 const ImageInfo& info = overlap.info;
376 if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) {
377 return std::nullopt;
378 }
379 if (new_info.block != info.block) {
380 return std::nullopt;
381 }
382 const SubresourceExtent resources = new_info.resources;
383 return SubresourceExtent{
384 .levels = std::max(resources.levels, info.resources.levels),
385 .layers = std::max(resources.layers, info.resources.layers),
386 };
387}
388
389[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
390 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
391 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
392 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
393 const auto it = std::ranges::find(slice_offsets, diff);
394 if (it == slice_offsets.end()) {
395 return std::nullopt;
396 }
397 const std::vector subresources = CalculateSliceSubresources(new_info);
398 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
399 const ImageInfo& info = overlap.info;
400 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
401 return std::nullopt;
402 }
403 const u32 mip_depth = std::max(1U, new_info.size.depth << base.level);
404 if (mip_depth < info.size.depth + base.layer) {
405 return std::nullopt;
406 }
407 if (MipBlockSize(new_info, base.level) != info.block) {
408 return std::nullopt;
409 }
410 return SubresourceExtent{
411 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
412 .layers = 1,
413 };
414}
415
416[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
417 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
418 const u32 layer_stride = new_info.layer_stride;
419 const s32 new_size = layer_stride * new_info.resources.layers;
420 const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
421 if (diff > new_size) {
422 return std::nullopt;
423 }
424 const s32 base_layer = diff / layer_stride;
425 const s32 mip_offset = diff % layer_stride;
426 const std::array offsets = CalculateMipLevelOffsets(new_info);
427 const auto end = offsets.begin() + new_info.resources.levels;
428 const auto it = std::find(offsets.begin(), end, mip_offset);
429 if (it == end) {
430 // Mipmap is not aligned to any valid size
431 return std::nullopt;
432 }
433 const SubresourceBase base{
434 .level = static_cast<s32>(std::distance(offsets.begin(), it)),
435 .layer = base_layer,
436 };
437 const ImageInfo& info = overlap.info;
438 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
439 return std::nullopt;
440 }
441 if (MipBlockSize(new_info, base.level) != info.block) {
442 return std::nullopt;
443 }
444 return SubresourceExtent{
445 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
446 .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer),
447 };
448}
449
450[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
451 GPUVAddr gpu_addr,
452 VAddr cpu_addr,
453 const ImageBase& overlap,
454 bool strict_size) {
455 std::optional<SubresourceExtent> resources;
456 if (new_info.type != ImageType::e3D) {
457 resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size);
458 } else {
459 resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size);
460 }
461 if (!resources) {
462 return std::nullopt;
463 }
464 return OverlapResult{
465 .gpu_addr = gpu_addr,
466 .cpu_addr = cpu_addr,
467 .resources = *resources,
468 };
469}
470
471[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
472 GPUVAddr gpu_addr,
473 VAddr cpu_addr,
474 const ImageBase& overlap,
475 bool strict_size) {
476 const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr);
477 if (!base) {
478 return std::nullopt;
479 }
480 const ImageInfo& info = overlap.info;
481 if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) {
482 return std::nullopt;
483 }
484 if (new_info.block != MipBlockSize(info, base->level)) {
485 return std::nullopt;
486 }
487 const SubresourceExtent resources = new_info.resources;
488 s32 layers = 1;
489 if (info.type != ImageType::e3D) {
490 layers = std::max(resources.layers, info.resources.layers + base->layer);
491 }
492 return OverlapResult{
493 .gpu_addr = overlap.gpu_addr,
494 .cpu_addr = overlap.cpu_addr,
495 .resources =
496 {
497 .levels = std::max(resources.levels + base->level, info.resources.levels),
498 .layers = layers,
499 },
500 };
501}
502
503[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) {
504 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212
505 static constexpr u32 STRIDE_ALIGNMENT = 32;
506 ASSERT(info.type == ImageType::Linear);
507 const Extent2D num_tiles{
508 .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)),
509 .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)),
510 };
511 const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format);
512 return Extent2D{
513 .width = Common::AlignUp(num_tiles.width, width_alignment),
514 .height = num_tiles.height,
515 };
516}
517
518[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) {
519 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176
520 ASSERT(info.type != ImageType::Linear);
521 const Extent3D size = AdjustMipSize(info.size, level);
522 const Extent3D num_tiles{
523 .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)),
524 .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)),
525 .depth = size.depth,
526 };
527 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
528 const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
529 const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
530 return Extent3D{
531 .width = Common::AlignBits(num_tiles.width, alignment),
532 .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
533 .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
534 };
535}
536
537[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
538 u32 num_blocks = 0;
539 for (s32 level = 0; level < info.resources.levels; ++level) {
540 const Extent3D mip_size = AdjustMipSize(info.size, level);
541 num_blocks += NumBlocks(mip_size, tile_size);
542 }
543 return num_blocks;
544}
545
546[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept {
547 ASSERT(info.type == ImageType::e3D);
548 u32 num_slices = 0;
549 for (s32 level = 0; level < info.resources.levels; ++level) {
550 num_slices += AdjustMipSize(info.size.depth, level);
551 }
552 return num_slices;
553}
554
555void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
556 const ImageInfo& info, const BufferImageCopy& copy,
557 std::span<const u8> memory) {
558 ASSERT(copy.image_offset.z == 0);
559 ASSERT(copy.image_extent.depth == 1);
560 ASSERT(copy.image_subresource.base_level == 0);
561 ASSERT(copy.image_subresource.base_layer == 0);
562 ASSERT(copy.image_subresource.num_layers == 1);
563
564 const u32 bytes_per_block = BytesPerBlock(info.format);
565 const u32 row_length = copy.image_extent.width * bytes_per_block;
566 const u32 guest_offset_x = copy.image_offset.x * bytes_per_block;
567
568 for (u32 line = 0; line < copy.image_extent.height; ++line) {
569 const u32 host_offset_y = line * info.pitch;
570 const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch;
571 const u32 guest_offset = guest_offset_x + guest_offset_y;
572 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y,
573 row_length);
574 }
575}
576
577void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
578 const ImageInfo& info, const BufferImageCopy& copy,
579 std::span<const u8> input) {
580 const Extent3D size = info.size;
581 const LevelInfo level_info = MakeLevelInfo(info);
582 const Extent2D tile_size = DefaultBlockSize(info.format);
583 const u32 bytes_per_block = BytesPerBlock(info.format);
584
585 const s32 level = copy.image_subresource.base_level;
586 const Extent3D level_size = AdjustMipSize(size, level);
587 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
588 const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
589
590 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
591
592 UNIMPLEMENTED_IF(copy.image_offset.x != 0);
593 UNIMPLEMENTED_IF(copy.image_offset.y != 0);
594 UNIMPLEMENTED_IF(copy.image_offset.z != 0);
595 UNIMPLEMENTED_IF(copy.image_extent != level_size);
596
597 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
598 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
599
600 size_t host_offset = copy.buffer_offset;
601
602 const u32 num_levels = info.resources.levels;
603 const std::array sizes = CalculateLevelSizes(level_info, num_levels);
604 size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0);
605 const size_t layer_stride =
606 AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size,
607 level_info.block, tile_size.height, info.tile_width_spacing);
608 const size_t subresource_size = sizes[level];
609
610 const auto dst_data = std::make_unique<u8[]>(subresource_size);
611 const std::span<u8> dst(dst_data.get(), subresource_size);
612
613 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
614 const std::span<const u8> src = input.subspan(host_offset);
615 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
616 num_tiles.depth, block.height, block.depth);
617
618 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
619
620 host_offset += host_bytes_per_layer;
621 guest_offset += layer_stride;
622 }
623 ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
624}
625
626} // Anonymous namespace
627
628u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
629 if (info.type == ImageType::Buffer) {
630 return info.size.width * BytesPerBlock(info.format);
631 }
632 if (info.type == ImageType::Linear) {
633 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
634 }
635 if (info.resources.layers > 1) {
636 ASSERT(info.layer_stride != 0);
637 return info.layer_stride * info.resources.layers;
638 } else {
639 return CalculateLayerSize(info);
640 }
641}
642
643u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
644 if (info.type == ImageType::Buffer) {
645 return info.size.width * BytesPerBlock(info.format);
646 }
647 if (info.num_samples > 1) {
648 // Multisample images can't be uploaded or downloaded to the host
649 return 0;
650 }
651 if (info.type == ImageType::Linear) {
652 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
653 }
654 const Extent2D tile_size = DefaultBlockSize(info.format);
655 return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format);
656}
657
658u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
659 if (info.type == ImageType::Buffer) {
660 return info.size.width * BytesPerBlock(info.format);
661 }
662 static constexpr Extent2D TILE_SIZE{1, 1};
663 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
664}
665
666u32 CalculateLayerStride(const ImageInfo& info) noexcept {
667 ASSERT(info.type != ImageType::Linear);
668 const u32 layer_size = CalculateLayerSize(info);
669 const Extent3D size = info.size;
670 const Extent3D block = info.block;
671 const u32 tile_size_y = DefaultBlockHeight(info.format);
672 return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing);
673}
674
675u32 CalculateLayerSize(const ImageInfo& info) noexcept {
676 ASSERT(info.type != ImageType::Linear);
677 return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
678 info.tile_width_spacing, info.resources.levels);
679}
680
681std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
682 ASSERT(info.resources.levels <= MAX_MIP_LEVELS);
683 const LevelInfo level_info = MakeLevelInfo(info);
684 std::array<u32, MAX_MIP_LEVELS> offsets{};
685 u32 offset = 0;
686 for (s32 level = 0; level < info.resources.levels; ++level) {
687 offsets[level] = offset;
688 offset += CalculateLevelSize(level_info, level);
689 }
690 return offsets;
691}
692
693std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
694 ASSERT(info.type == ImageType::e3D);
695 std::vector<u32> offsets;
696 offsets.reserve(NumSlices(info));
697
698 const LevelInfo level_info = MakeLevelInfo(info);
699 u32 mip_offset = 0;
700 for (s32 level = 0; level < info.resources.levels; ++level) {
701 const Extent3D tile_shift = TileShift(level_info, level);
702 const Extent3D tiles = LevelTiles(level_info, level);
703 const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT;
704 const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift;
705 const u32 z_mask = (1U << tile_shift.depth) - 1;
706 const u32 depth = AdjustMipSize(info.size.depth, level);
707 for (u32 slice = 0; slice < depth; ++slice) {
708 const u32 z_low = slice & z_mask;
709 const u32 z_high = slice & ~z_mask;
710 offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size));
711 }
712 mip_offset += CalculateLevelSize(level_info, level);
713 }
714 return offsets;
715}
716
717std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
718 ASSERT(info.type == ImageType::e3D);
719 std::vector<SubresourceBase> subresources;
720 subresources.reserve(NumSlices(info));
721 for (s32 level = 0; level < info.resources.levels; ++level) {
722 const s32 depth = AdjustMipSize(info.size.depth, level);
723 for (s32 slice = 0; slice < depth; ++slice) {
724 subresources.emplace_back(SubresourceBase{
725 .level = level,
726 .layer = slice,
727 });
728 }
729 }
730 return subresources;
731}
732
733u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
734 const Extent2D tile_size = DefaultBlockSize(info.format);
735 const Extent3D level_size = AdjustMipSize(info.size, level);
736 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
737 const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
738 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
739 return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
740}
741
742PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept {
743 return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
744 config.a_type, config.srgb_conversion);
745}
746
747ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
748 switch (info.type) {
749 case ImageType::e2D:
750 return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D;
751 case ImageType::e3D:
752 return ImageViewType::e2DArray;
753 case ImageType::Linear:
754 return ImageViewType::e2D;
755 default:
756 UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type));
757 return ImageViewType{};
758 }
759}
760
761std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
762 SubresourceBase base) {
763 ASSERT(dst.resources.levels >= src.resources.levels);
764 ASSERT(dst.num_samples == src.num_samples);
765
766 const bool is_dst_3d = dst.type == ImageType::e3D;
767 if (is_dst_3d) {
768 ASSERT(src.type == ImageType::e3D);
769 ASSERT(src.resources.levels == 1);
770 }
771
772 std::vector<ImageCopy> copies;
773 copies.reserve(src.resources.levels);
774 for (s32 level = 0; level < src.resources.levels; ++level) {
775 ImageCopy& copy = copies.emplace_back();
776 copy.src_subresource = SubresourceLayers{
777 .base_level = level,
778 .base_layer = 0,
779 .num_layers = src.resources.layers,
780 };
781 copy.dst_subresource = SubresourceLayers{
782 .base_level = base.level + level,
783 .base_layer = is_dst_3d ? 0 : base.layer,
784 .num_layers = is_dst_3d ? 1 : src.resources.layers,
785 };
786 copy.src_offset = Offset3D{
787 .x = 0,
788 .y = 0,
789 .z = 0,
790 };
791 copy.dst_offset = Offset3D{
792 .x = 0,
793 .y = 0,
794 .z = is_dst_3d ? base.layer : 0,
795 };
796 const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level);
797 copy.extent = AdjustSamplesSize(mip_size, dst.num_samples);
798 if (is_dst_3d) {
799 copy.extent.depth = src.size.depth;
800 }
801 }
802 return copies;
803}
804
805bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
806 if (config.Address() == 0) {
807 return false;
808 }
809 if (config.Address() > (u64(1) << 48)) {
810 return false;
811 }
812 return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
813}
814
815std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
816 const ImageInfo& info, std::span<u8> output) {
817 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
818 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
819 const Extent3D size = info.size;
820
821 if (info.type == ImageType::Linear) {
822 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
823
824 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
825 return {{
826 .buffer_offset = 0,
827 .buffer_size = guest_size_bytes,
828 .buffer_row_length = info.pitch >> bpp_log2,
829 .buffer_image_height = size.height,
830 .image_subresource =
831 {
832 .base_level = 0,
833 .base_layer = 0,
834 .num_layers = 1,
835 },
836 .image_offset = {0, 0, 0},
837 .image_extent = size,
838 }};
839 }
840 const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
841 gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
842 const std::span<const u8> input(input_data.get(), guest_size_bytes);
843
844 const LevelInfo level_info = MakeLevelInfo(info);
845 const s32 num_layers = info.resources.layers;
846 const s32 num_levels = info.resources.levels;
847 const Extent2D tile_size = DefaultBlockSize(info.format);
848 const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
849 const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
850 const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0);
851 const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
852 info.tile_width_spacing);
853 size_t guest_offset = 0;
854 u32 host_offset = 0;
855 std::vector<BufferImageCopy> copies(num_levels);
856
857 for (s32 level = 0; level < num_levels; ++level) {
858 const Extent3D level_size = AdjustMipSize(size, level);
859 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
860 const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2;
861 copies[level] = BufferImageCopy{
862 .buffer_offset = host_offset,
863 .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers,
864 .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width),
865 .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height),
866 .image_subresource =
867 {
868 .base_level = level,
869 .base_layer = 0,
870 .num_layers = info.resources.layers,
871 },
872 .image_offset = {0, 0, 0},
873 .image_extent = level_size,
874 };
875 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
876 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
877 const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
878 size_t guest_layer_offset = 0;
879
880 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
881 const std::span<u8> dst = output.subspan(host_offset);
882 const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
883 UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
884 num_tiles.depth, block.height, block.depth, stride_alignment);
885 guest_layer_offset += layer_stride;
886 host_offset += host_bytes_per_layer;
887 }
888 guest_offset += level_sizes[level];
889 }
890 return copies;
891}
892
893BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
894 const ImageBase& image, std::span<u8> output) {
895 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
896 return BufferCopy{
897 .src_offset = 0,
898 .dst_offset = 0,
899 .size = image.guest_size_bytes,
900 };
901}
902
903void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
904 std::span<BufferImageCopy> copies) {
905 u32 output_offset = 0;
906
907 const Extent2D tile_size = DefaultBlockSize(info.format);
908 for (BufferImageCopy& copy : copies) {
909 const u32 level = copy.image_subresource.base_level;
910 const Extent3D mip_size = AdjustMipSize(info.size, level);
911 ASSERT(copy.image_offset == Offset3D{});
912 ASSERT(copy.image_subresource.base_layer == 0);
913 ASSERT(copy.image_extent == mip_size);
914 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
915 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
916
917 if (IsPixelFormatASTC(info.format)) {
918 ASSERT(copy.image_extent.depth == 1);
919 Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
920 copy.image_extent.width, copy.image_extent.height,
921 copy.image_subresource.num_layers, tile_size.width,
922 tile_size.height, output.subspan(output_offset));
923 } else {
924 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
925 output.subspan(output_offset));
926 }
927 copy.buffer_offset = output_offset;
928 copy.buffer_row_length = mip_size.width;
929 copy.buffer_image_height = mip_size.height;
930
931 output_offset += copy.image_extent.width * copy.image_extent.height *
932 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
933 }
934}
935
936std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
937 const Extent3D size = info.size;
938 const u32 bytes_per_block = BytesPerBlock(info.format);
939 if (info.type == ImageType::Linear) {
940 ASSERT(info.pitch % bytes_per_block == 0);
941 return {{
942 .buffer_offset = 0,
943 .buffer_size = static_cast<size_t>(info.pitch) * size.height,
944 .buffer_row_length = info.pitch / bytes_per_block,
945 .buffer_image_height = size.height,
946 .image_subresource =
947 {
948 .base_level = 0,
949 .base_layer = 0,
950 .num_layers = 1,
951 },
952 .image_offset = {0, 0, 0},
953 .image_extent = size,
954 }};
955 }
956 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
957
958 const s32 num_layers = info.resources.layers;
959 const s32 num_levels = info.resources.levels;
960 const Extent2D tile_size = DefaultBlockSize(info.format);
961
962 u32 host_offset = 0;
963
964 std::vector<BufferImageCopy> copies(num_levels);
965 for (s32 level = 0; level < num_levels; ++level) {
966 const Extent3D level_size = AdjustMipSize(size, level);
967 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
968 const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers;
969 copies[level] = BufferImageCopy{
970 .buffer_offset = host_offset,
971 .buffer_size = host_bytes_per_level,
972 .buffer_row_length = level_size.width,
973 .buffer_image_height = level_size.height,
974 .image_subresource =
975 {
976 .base_level = level,
977 .base_layer = 0,
978 .num_layers = info.resources.layers,
979 },
980 .image_offset = {0, 0, 0},
981 .image_extent = level_size,
982 };
983 host_offset += host_bytes_per_level;
984 }
985 return copies;
986}
987
988Extent3D MipSize(Extent3D size, u32 level) {
989 return AdjustMipSize(size, level);
990}
991
992Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
993 const LevelInfo level_info = MakeLevelInfo(info);
994 const Extent2D tile_size = DefaultBlockSize(info.format);
995 const Extent3D level_size = AdjustMipSize(info.size, level);
996 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
997 return AdjustMipBlockSize(num_tiles, level_info.block, level);
998}
999
1000std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1001 const Extent2D tile_size = DefaultBlockSize(info.format);
1002 if (info.type == ImageType::Linear) {
1003 return std::vector{SwizzleParameters{
1004 .num_tiles = AdjustTileSize(info.size, tile_size),
1005 .block = {},
1006 .buffer_offset = 0,
1007 .level = 0,
1008 }};
1009 }
1010 const LevelInfo level_info = MakeLevelInfo(info);
1011 const Extent3D size = info.size;
1012 const s32 num_levels = info.resources.levels;
1013
1014 u32 guest_offset = 0;
1015 std::vector<SwizzleParameters> params(num_levels);
1016 for (s32 level = 0; level < num_levels; ++level) {
1017 const Extent3D level_size = AdjustMipSize(size, level);
1018 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
1019 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
1020 params[level] = SwizzleParameters{
1021 .num_tiles = num_tiles,
1022 .block = block,
1023 .buffer_offset = guest_offset,
1024 .level = level,
1025 };
1026 guest_offset += CalculateLevelSize(level_info, level);
1027 }
1028 return params;
1029}
1030
1031void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
1032 std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
1033 const bool is_pitch_linear = info.type == ImageType::Linear;
1034 for (const BufferImageCopy& copy : copies) {
1035 if (is_pitch_linear) {
1036 SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1037 } else {
1038 SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1039 }
1040 }
1041}
1042
1043bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level,
1044 u32 rhs_level, bool strict_size) noexcept {
1045 ASSERT(lhs.type != ImageType::Linear);
1046 ASSERT(rhs.type != ImageType::Linear);
1047 if (strict_size) {
1048 const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
1049 const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
1050 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1051 } else {
1052 const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level);
1053 const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level);
1054 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1055 }
1056}
1057
1058bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
1059 ASSERT(lhs.type == ImageType::Linear);
1060 ASSERT(rhs.type == ImageType::Linear);
1061 if (strict_size) {
1062 return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height;
1063 } else {
1064 const Extent2D lhs_size = PitchLinearAlignedSize(lhs);
1065 const Extent2D rhs_size = PitchLinearAlignedSize(rhs);
1066 return lhs_size == rhs_size;
1067 }
1068}
1069
1070std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
1071 VAddr cpu_addr, const ImageBase& overlap,
1072 bool strict_size) {
1073 ASSERT(new_info.type != ImageType::Linear);
1074 ASSERT(overlap.info.type != ImageType::Linear);
1075 if (!IsLayerStrideCompatible(new_info, overlap.info)) {
1076 return std::nullopt;
1077 }
1078 if (!IsViewCompatible(overlap.info.format, new_info.format)) {
1079 return std::nullopt;
1080 }
1081 if (gpu_addr == overlap.gpu_addr) {
1082 const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size);
1083 if (!solution) {
1084 return std::nullopt;
1085 }
1086 return OverlapResult{
1087 .gpu_addr = gpu_addr,
1088 .cpu_addr = cpu_addr,
1089 .resources = *solution,
1090 };
1091 }
1092 if (overlap.gpu_addr > gpu_addr) {
1093 return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1094 }
1095 // if overlap.gpu_addr < gpu_addr
1096 return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1097}
1098
1099bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
1100 // If either of the layer strides is zero, we can assume they are compatible
1101 // These images generally come from rendertargets
1102 if (lhs.layer_stride == 0) {
1103 return true;
1104 }
1105 if (rhs.layer_stride == 0) {
1106 return true;
1107 }
1108 // It's definitely compatible if the layer stride matches
1109 if (lhs.layer_stride == rhs.layer_stride) {
1110 return true;
1111 }
1112 // Although we also have to compare for cases where it can be unaligned
1113 // This can happen if the image doesn't have layers, so the stride is not aligned
1114 if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) {
1115 return true;
1116 }
1117 return false;
1118}
1119
1120std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
1121 GPUVAddr candidate_addr, RelaxedOptions options) {
1122 const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
1123 if (!base) {
1124 return std::nullopt;
1125 }
1126 const ImageInfo& existing = image.info;
1127 if (False(options & RelaxedOptions::Format)) {
1128 if (!IsViewCompatible(existing.format, candidate.format)) {
1129 return std::nullopt;
1130 }
1131 }
1132 if (!IsLayerStrideCompatible(existing, candidate)) {
1133 return std::nullopt;
1134 }
1135 if (existing.type != candidate.type) {
1136 return std::nullopt;
1137 }
1138 if (False(options & RelaxedOptions::Samples)) {
1139 if (existing.num_samples != candidate.num_samples) {
1140 return std::nullopt;
1141 }
1142 }
1143 if (existing.resources.levels < candidate.resources.levels + base->level) {
1144 return std::nullopt;
1145 }
1146 if (existing.type == ImageType::e3D) {
1147 const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
1148 if (mip_depth < candidate.size.depth + base->layer) {
1149 return std::nullopt;
1150 }
1151 } else {
1152 if (existing.resources.layers < candidate.resources.layers + base->layer) {
1153 return std::nullopt;
1154 }
1155 }
1156 const bool strict_size = False(options & RelaxedOptions::Size);
1157 if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
1158 return std::nullopt;
1159 }
1160 // TODO: compare block sizes
1161 return base;
1162}
1163
1164bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
1165 RelaxedOptions options) {
1166 return FindSubresource(candidate, image, candidate_addr, options).has_value();
1167}
1168
1169void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
1170 const ImageBase* src) {
1171 if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1172 src_info.format = src->info.format;
1173 }
1174 if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1175 dst_info.format = dst->info.format;
1176 }
1177 if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1178 dst_info.format = src->info.format;
1179 }
1180 if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1181 src_info.format = src->info.format;
1182 }
1183}
1184
1185u32 MapSizeBytes(const ImageBase& image) {
1186 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
1187 return image.guest_size_bytes;
1188 } else if (True(image.flags & ImageFlagBits::Converted)) {
1189 return image.converted_size_bytes;
1190 } else {
1191 return image.unswizzled_size_bytes;
1192 }
1193}
1194
1195using P = PixelFormat;
1196
1197static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000);
1198static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
1199
1200static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00);
1201static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) ==
1202 0x50d200);
1203
1204static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0);
1205static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000);
1206static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000);
1207static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000);
1208static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000);
1209static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000);
1210static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000);
1211static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400);
1212static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600);
1213static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800);
1214
1215constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
1216 u32 tile_width_spacing, u32 level) {
1217 const Extent3D size{width, height, 1};
1218 const Extent3D block{0, block_height, 0};
1219 const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
1220 return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
1221}
1222
1223static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800);
1224static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
1225static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
1226
1227static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
1228 "Tile width spacing is not working");
1229static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
1230 "Compressed tile width spacing is not working");
1231
1232} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
new file mode 100644
index 000000000..dbbbd33cd
--- /dev/null
+++ b/src/video_core/texture_cache/util.h
@@ -0,0 +1,107 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <span>
9
10#include "common/common_types.h"
11
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/image_base.h"
15#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/types.h"
17#include "video_core/textures/texture.h"
18
19namespace VideoCommon {
20
21using Tegra::Texture::TICEntry;
22
23struct OverlapResult {
24 GPUVAddr gpu_addr;
25 VAddr cpu_addr;
26 SubresourceExtent resources;
27};
28
29[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
30
31[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
32
33[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept;
34
35[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept;
36
37[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept;
38
39[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(
40 const ImageInfo& info) noexcept;
41
42[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
43
44[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
45
46[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
47
48[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC(
49 const Tegra::Texture::TICEntry& config) noexcept;
50
51[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
52
53[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
54 const ImageInfo& src,
55 SubresourceBase base);
56
57[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
58
59[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
60 GPUVAddr gpu_addr, const ImageInfo& info,
61 std::span<u8> output);
62
63[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
64 const ImageBase& image, std::span<u8> output);
65
66void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
67 std::span<BufferImageCopy> copies);
68
69[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
70
71[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
72
73[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
74
75[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
76
77void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
78 std::span<const BufferImageCopy> copies, std::span<const u8> memory);
79
80[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
81 const ImageInfo& overlap_info, u32 new_level,
82 u32 overlap_level, bool strict_size) noexcept;
83
84[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs,
85 bool strict_size) noexcept;
86
87[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
88 GPUVAddr gpu_addr, VAddr cpu_addr,
89 const ImageBase& overlap,
90 bool strict_size);
91
92[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
93
94[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
95 const ImageBase& image,
96 GPUVAddr candidate_addr,
97 RelaxedOptions options);
98
99[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
100 GPUVAddr candidate_addr, RelaxedOptions options);
101
102void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
103 const ImageBase* src);
104
105[[nodiscard]] u32 MapSizeBytes(const ImageBase& image);
106
107} // namespace VideoCommon
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..acd5bdd78 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
18#include <algorithm> 18#include <algorithm>
19#include <cassert> 19#include <cassert>
20#include <cstring> 20#include <cstring>
21#include <span>
21#include <vector> 22#include <vector>
22 23
23#include <boost/container/static_vector.hpp> 24#include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
600 return params; 601 return params;
601} 602}
602 603
603static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, 604static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
604 u32 blockHeight) { 605 u32 blockHeight) {
605 // Don't actually care about the void extent, just read the bits... 606 // Don't actually care about the void extent, just read the bits...
606 for (s32 i = 0; i < 4; ++i) { 607 for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
623 } 624 }
624} 625}
625 626
626static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { 627static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
627 for (u32 j = 0; j < blockHeight; j++) { 628 for (u32 j = 0; j < blockHeight; j++) {
628 for (u32 i = 0; i < blockWidth; i++) { 629 for (u32 i = 0; i < blockWidth; i++) {
629 outBuf[j * blockWidth + i] = 0xFFFF00FF; 630 outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1438#undef READ_INT_VALUES 1439#undef READ_INT_VALUES
1439} 1440}
1440 1441
1441static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, 1442static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1442 u32* outBuf) { 1443 const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1443 InputBitStream strm(inBuf); 1444 InputBitStream strm(inBuf.data());
1444 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1445 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1445 1446
1446 // Was there an error? 1447 // Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1601 } 1602 }
1602 1603
1603 // Read the texel weight data.. 1604 // Read the texel weight data..
1604 u8 texelWeightData[16]; 1605 std::array<u8, 16> texelWeightData;
1605 memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); 1606 std::ranges::copy(inBuf, texelWeightData.begin());
1606 1607
1607 // Reverse everything 1608 // Reverse everything
1608 for (u32 i = 0; i < 8; i++) { 1609 for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1618 1619
1619 // Make sure that higher non-texel bits are set to zero 1620 // Make sure that higher non-texel bits are set to zero
1620 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1621 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1621 texelWeightData[clearByteStart - 1] = 1622 if (clearByteStart > 0) {
1622 texelWeightData[clearByteStart - 1] & 1623 texelWeightData[clearByteStart - 1] &=
1623 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1624 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1624 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1625 }
1626 std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
1625 1627
1626 IntegerEncodedVector texelWeightValues; 1628 IntegerEncodedVector texelWeightValues;
1627 1629
1628 InputBitStream weightStream(texelWeightData); 1630 InputBitStream weightStream(texelWeightData.data());
1629 1631
1630 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, 1632 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1631 weightParams.GetNumWeightValues()); 1633 weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1672 1674
1673namespace Tegra::Texture::ASTC { 1675namespace Tegra::Texture::ASTC {
1674 1676
1675std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, 1677void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
1676 u32 block_height) { 1678 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
1677 u32 blockIdx = 0; 1679 u32 block_index = 0;
1678 std::size_t depth_offset = 0; 1680 std::size_t depth_offset = 0;
1679 std::vector<u8> outData(height * width * depth * 4); 1681 for (u32 z = 0; z < depth; z++) {
1680 for (u32 k = 0; k < depth; k++) { 1682 for (u32 y = 0; y < height; y += block_height) {
1681 for (u32 j = 0; j < height; j += block_height) { 1683 for (u32 x = 0; x < width; x += block_width) {
1682 for (u32 i = 0; i < width; i += block_width) { 1684 const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
1683
1684 const u8* blockPtr = data + blockIdx * 16;
1685 1685
1686 // Blocks can be at most 12x12 1686 // Blocks can be at most 12x12
1687 u32 uncompData[144]; 1687 std::array<u32, 12 * 12> uncompData;
1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); 1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1689 1689
1690 u32 decompWidth = std::min(block_width, width - i); 1690 u32 decompWidth = std::min(block_width, width - x);
1691 u32 decompHeight = std::min(block_height, height - j); 1691 u32 decompHeight = std::min(block_height, height - y);
1692 1692
1693 u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; 1693 const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
1694 for (u32 jj = 0; jj < decompHeight; jj++) { 1694 for (u32 jj = 0; jj < decompHeight; jj++) {
1695 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1695 std::memcpy(outRow.data() + jj * width * 4,
1696 uncompData.data() + jj * block_width, decompWidth * 4);
1696 } 1697 }
1697 1698 ++block_index;
1698 blockIdx++;
1699 } 1699 }
1700 } 1700 }
1701 depth_offset += height * width * 4; 1701 depth_offset += height * width * 4;
1702 } 1702 }
1703
1704 return outData;
1705} 1703}
1706 1704
1707} // namespace Tegra::Texture::ASTC 1705} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 991cdba72..9105119bc 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -5,11 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <cstdint> 7#include <cstdint>
8#include <vector>
9 8
10namespace Tegra::Texture::ASTC { 9namespace Tegra::Texture::ASTC {
11 10
12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 11void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 12 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
14 13
15} // namespace Tegra::Texture::ASTC 14} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
deleted file mode 100644
index 962921483..000000000
--- a/src/video_core/textures/convert.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/surface.h"
14#include "video_core/textures/astc.h"
15#include "video_core/textures/convert.h"
16
17namespace Tegra::Texture {
18
19using VideoCore::Surface::PixelFormat;
20
21template <bool reverse>
22void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
23 union S8Z24 {
24 BitField<0, 24, u32> z24;
25 BitField<24, 8, u32> s8;
26 };
27 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
28
29 union Z24S8 {
30 BitField<0, 8, u32> s8;
31 BitField<8, 24, u32> z24;
32 };
33 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
34
35 S8Z24 s8z24_pixel{};
36 Z24S8 z24s8_pixel{};
37 constexpr auto bpp{
38 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
39 for (std::size_t y = 0; y < height; ++y) {
40 for (std::size_t x = 0; x < width; ++x) {
41 const std::size_t offset{bpp * (y * width + x)};
42 if constexpr (reverse) {
43 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
44 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
45 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
46 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
47 } else {
48 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
49 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
50 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
51 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
52 }
53 }
54 }
55}
56
57static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
58 SwapS8Z24ToZ24S8<false>(data, width, height);
59}
60
61static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
62 SwapS8Z24ToZ24S8<true>(data, width, height);
63}
64
65void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
66 u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
67 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
69 u32 block_width{};
70 u32 block_height{};
71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
72 const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
73 in_data, width, height, depth, block_width, block_height);
74 std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
75
76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
77 Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
78 }
79}
80
81void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
82 bool convert_astc, bool convert_s8z24) {
83 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
84 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
85 static_cast<u32>(pixel_format));
86 UNREACHABLE();
87
88 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
89 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
90 }
91}
92
93} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
deleted file mode 100644
index d5d6c77bb..000000000
--- a/src/video_core/textures/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore::Surface {
10enum class PixelFormat;
11}
12
13namespace Tegra::Texture {
14
15void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
16 u32 width, u32 height, u32 depth, bool convert_astc,
17 bool convert_s8z24);
18
19void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
20 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
21
22} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 16d46a018..9f5181318 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -2,204 +2,111 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <cmath> 6#include <cmath>
6#include <cstring> 7#include <cstring>
8#include <span>
9#include <utility>
10
7#include "common/alignment.h" 11#include "common/alignment.h"
8#include "common/assert.h" 12#include "common/assert.h"
9#include "common/bit_util.h" 13#include "common/bit_util.h"
14#include "common/div_ceil.h"
10#include "video_core/gpu.h" 15#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 16#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 17#include "video_core/textures/texture.h"
13 18
14namespace Tegra::Texture { 19namespace Tegra::Texture {
15namespace {
16 20
21namespace {
17/** 22/**
18 * This table represents the internal swizzle of a gob, 23 * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
19 * in format 16 bytes x 2 sector packing.
20 * Calculates the offset of an (x, y) position within a swizzled texture. 24 * Calculates the offset of an (x, y) position within a swizzled texture.
21 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 25 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
22 */ 26 */
23template <std::size_t N, std::size_t M, u32 Align> 27constexpr SwizzleTable MakeSwizzleTableConst() {
24struct alignas(64) SwizzleTable { 28 SwizzleTable table{};
25 static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); 29 for (u32 y = 0; y < table.size(); ++y) {
26 constexpr SwizzleTable() { 30 for (u32 x = 0; x < table[0].size(); ++x) {
27 for (u32 y = 0; y < N; ++y) { 31 table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
28 for (u32 x = 0; x < M; ++x) { 32 (y % 2) * 16 + (x % 16);
29 const u32 x2 = x * Align;
30 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
31 ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
32 }
33 } 33 }
34 } 34 }
35 const std::array<u16, M>& operator[](std::size_t index) const { 35 return table;
36 return values[index]; 36}
37 }
38 std::array<std::array<u16, M>, N> values{};
39};
40 37
41constexpr u32 FAST_SWIZZLE_ALIGN = 16; 38constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
42 39
43constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); 40template <bool TO_LINEAR>
44constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); 41void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
42 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
43 // The origin of the transformation can be configured here, leave it as zero as the current API
44 // doesn't expose it.
45 static constexpr u32 origin_x = 0;
46 static constexpr u32 origin_y = 0;
47 static constexpr u32 origin_z = 0;
45 48
46/** 49 // We can configure here a custom pitch
47 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 50 // As it's not exposed 'width * bpp' will be the expected pitch.
48 * Instead of going gob by gob, we map the coordinates inside a block and manage from 51 const u32 pitch = width * bytes_per_pixel;
49 * those. Block_Width is assumed to be 1. 52 const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
50 */
51void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
52 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
53 const u32 y_end, const u32 z_end, const u32 tile_offset,
54 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
55 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
56 std::array<u8*, 2> data_ptrs;
57 u32 z_address = tile_offset;
58
59 for (u32 z = z_start; z < z_end; z++) {
60 u32 y_address = z_address;
61 u32 pixel_base = layer_z * z + y_start * stride_x;
62 for (u32 y = y_start; y < y_end; y++) {
63 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
64 for (u32 x = x_start; x < x_end; x++) {
65 const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
66 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
67 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
68 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
69 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
70 }
71 pixel_base += stride_x;
72 if ((y + 1) % GOB_SIZE_Y == 0)
73 y_address += GOB_SIZE;
74 }
75 z_address += xy_block_size;
76 }
77}
78 53
79/** 54 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
80 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 55 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
81 * Instead of going gob by gob, we map the coordinates inside a block and manage from 56 const u32 slice_size =
82 * those. Block_Width is assumed to be 1. 57 Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
83 */
84void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
85 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
86 const u32 y_end, const u32 z_end, const u32 tile_offset,
87 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
88 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
89 std::array<u8*, 2> data_ptrs;
90 u32 z_address = tile_offset;
91 const u32 x_startb = x_start * bytes_per_pixel;
92 const u32 x_endb = x_end * bytes_per_pixel;
93
94 for (u32 z = z_start; z < z_end; z++) {
95 u32 y_address = z_address;
96 u32 pixel_base = layer_z * z + y_start * stride_x;
97 for (u32 y = y_start; y < y_end; y++) {
98 const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
99 for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
100 const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
101 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
102 const u32 pixel_index{out_x + pixel_base};
103 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
104 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
105 std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
106 }
107 pixel_base += stride_x;
108 if ((y + 1) % GOB_SIZE_Y == 0)
109 y_address += GOB_SIZE;
110 }
111 z_address += xy_block_size;
112 }
113}
114 58
115/** 59 const u32 block_height_mask = (1U << block_height) - 1;
116 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. 60 const u32 block_depth_mask = (1U << block_depth) - 1;
117 * The body of this function takes care of splitting the swizzled texture into blocks, 61 const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
118 * and managing the extents of it. Once all the parameters of a single block are obtained, 62
119 * the function calls 'ProcessBlock' to process that particular Block. 63 for (u32 slice = 0; slice < depth; ++slice) {
120 * 64 const u32 z = slice + origin_z;
121 * Documentation for the memory layout and decoding can be found at: 65 const u32 offset_z = (z >> block_depth) * slice_size +
122 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces 66 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
123 */ 67 for (u32 line = 0; line < height; ++line) {
124template <bool fast> 68 const u32 y = line + origin_y;
125void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, 69 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
126 const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, 70
127 const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, 71 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
128 const u32 width_spacing) { 72 const u32 offset_y = (block_y >> block_height) * block_size +
129 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; 73 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
130 const u32 stride_x = width * out_bytes_per_pixel; 74
131 const u32 layer_z = height * stride_x; 75 for (u32 column = 0; column < width; ++column) {
132 const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; 76 const u32 x = (column + origin_x) * bytes_per_pixel;
133 constexpr u32 gob_elements_y = GOB_SIZE_Y; 77 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
134 constexpr u32 gob_elements_z = GOB_SIZE_Z; 78
135 const u32 block_x_elements = gob_elements_x; 79 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
136 const u32 block_y_elements = gob_elements_y * block_height; 80 const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
137 const u32 block_z_elements = gob_elements_z * block_depth; 81
138 const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); 82 const u32 unswizzled_offset =
139 const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); 83 slice * pitch * height + line * pitch + column * bytes_per_pixel;
140 const u32 blocks_on_y = div_ceil(height, block_y_elements); 84
141 const u32 blocks_on_z = div_ceil(depth, block_z_elements); 85 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
142 const u32 xy_block_size = GOB_SIZE * block_height; 86 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
143 const u32 block_size = xy_block_size * block_depth; 87 std::memcpy(dst, src, bytes_per_pixel);
144 u32 tile_offset = 0;
145 for (u32 zb = 0; zb < blocks_on_z; zb++) {
146 const u32 z_start = zb * block_z_elements;
147 const u32 z_end = std::min(depth, z_start + block_z_elements);
148 for (u32 yb = 0; yb < blocks_on_y; yb++) {
149 const u32 y_start = yb * block_y_elements;
150 const u32 y_end = std::min(height, y_start + block_y_elements);
151 for (u32 xb = 0; xb < blocks_on_x; xb++) {
152 const u32 x_start = xb * block_x_elements;
153 const u32 x_end = std::min(width, x_start + block_x_elements);
154 if constexpr (fast) {
155 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
156 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
157 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
158 } else {
159 PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
160 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
161 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
162 }
163 tile_offset += block_size;
164 } 88 }
165 } 89 }
166 } 90 }
167} 91}
168
169} // Anonymous namespace 92} // Anonymous namespace
170 93
171void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, 94SwizzleTable MakeSwizzleTable() {
172 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, 95 return SWIZZLE_TABLE;
173 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
174 const u32 block_height_size{1U << block_height};
175 const u32 block_depth_size{1U << block_depth};
176 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
177 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
178 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
179 block_depth_size, width_spacing);
180 } else {
181 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
182 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
183 block_depth_size, width_spacing);
184 }
185} 96}
186 97
187void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 98void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
188 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 99 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
189 u32 block_depth, u32 width_spacing) { 100 u32 stride_alignment) {
190 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 101 Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
191 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 102 stride_alignment);
192 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
193 width_spacing);
194} 103}
195 104
196std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 105void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
197 u32 width, u32 height, u32 depth, u32 block_height, 106 u32 height, u32 depth, u32 block_height, u32 block_depth,
198 u32 block_depth, u32 width_spacing) { 107 u32 stride_alignment) {
199 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 108 Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
200 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 109 stride_alignment);
201 width, height, depth, block_height, block_depth, width_spacing);
202 return unswizzled_data;
203} 110}
204 111
205void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 112void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
@@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
213 const u32 gob_address_y = 120 const u32 gob_address_y =
214 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 121 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
215 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 122 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
216 const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; 123 const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
217 for (u32 x = 0; x < subrect_width; ++x) { 124 for (u32 x = 0; x < subrect_width; ++x) {
218 const u32 dst_x = x + offset_x; 125 const u32 dst_x = x + offset_x;
219 const u32 gob_address = 126 const u32 gob_address =
@@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
235 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); 142 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
236 143
237 const u32 block_height_mask = (1U << block_height) - 1; 144 const u32 block_height_mask = (1U << block_height) - 1;
238 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; 145 const u32 x_shift = GOB_SIZE_SHIFT + block_height;
239 146
240 for (u32 line = 0; line < line_count; ++line) { 147 for (u32 line = 0; line < line_count; ++line) {
241 const u32 src_y = line + origin_y; 148 const u32 src_y = line + origin_y;
242 const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; 149 const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
243 150
244 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; 151 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
245 const u32 src_offset_y = (block_y >> block_height) * block_size + 152 const u32 src_offset_y = (block_y >> block_height) * block_size +
@@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
270 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; 177 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
271 178
272 for (u32 line = 0; line < line_count; ++line) { 179 for (u32 line = 0; line < line_count; ++line) {
273 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; 180 const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
274 const u32 block_y = line / GOB_SIZE_Y; 181 const u32 block_y = line / GOB_SIZE_Y;
275 const u32 dst_offset_y = 182 const u32 dst_offset_y =
276 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; 183 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
@@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
293 const std::size_t gob_address_y = 200 const std::size_t gob_address_y =
294 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 201 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
295 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 202 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
296 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; 203 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
297 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { 204 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
298 const std::size_t gob_address = 205 const std::size_t gob_address =
299 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; 206 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 01e156bc8..d7cdc81e8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -4,7 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector> 7#include <span>
8
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
@@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;
15constexpr u32 GOB_SIZE_Z = 1; 16constexpr u32 GOB_SIZE_Z = 1;
16constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; 17constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
17 18
18constexpr std::size_t GOB_SIZE_X_SHIFT = 6; 19constexpr u32 GOB_SIZE_X_SHIFT = 6;
19constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; 20constexpr u32 GOB_SIZE_Y_SHIFT = 3;
20constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; 21constexpr u32 GOB_SIZE_Z_SHIFT = 0;
21constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; 22constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
22 23
23/// Unswizzles a swizzled texture without changing its format. 24using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
24void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 25
25 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 26/// Returns a z-order swizzle table
26 u32 block_height = TICEntry::DefaultBlockHeight, 27SwizzleTable MakeSwizzleTable();
27 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 28
28 29/// Unswizzles a block linear texture into linear memory.
29/// Unswizzles a swizzled texture without changing its format. 30void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
30std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 31 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
31 u32 width, u32 height, u32 depth, 32 u32 stride_alignment = 1);
32 u32 block_height = TICEntry::DefaultBlockHeight, 33
33 u32 block_depth = TICEntry::DefaultBlockHeight, 34/// Swizzles linear memory into a block linear texture.
34 u32 width_spacing = 0); 35void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
35 36 u32 height, u32 depth, u32 block_height, u32 block_depth,
36/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. 37 u32 stride_alignment = 1);
37void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
38 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
39 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
40 38
41/// This function calculates the correct size of a texture depending if it's tiled or not. 39/// This function calculates the correct size of a texture depending if it's tiled or not.
42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 40std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4171e3ef2..ae5621a7d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -5,9 +5,13 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7 7
8#include "common/cityhash.h"
8#include "core/settings.h" 9#include "core/settings.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
12using Tegra::Texture::TICEntry;
13using Tegra::Texture::TSCEntry;
14
11namespace Tegra::Texture { 15namespace Tegra::Texture {
12 16
13namespace { 17namespace {
@@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {
65 69
66} // Anonymous namespace 70} // Anonymous namespace
67 71
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { 72std::array<float, 4> TSCEntry::BorderColor() const noexcept {
69 if (!srgb_conversion) { 73 if (!srgb_conversion) {
70 return border_color; 74 return border_color;
71 } 75 }
@@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; 77 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74} 78}
75 79
76float TSCEntry::GetMaxAnisotropy() const noexcept { 80float TSCEntry::MaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); 81 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78} 82}
79 83
80} // namespace Tegra::Texture 84} // namespace Tegra::Texture
85
86size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
87 return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
88}
89
90size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
91 return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
92}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 0574fef12..c1d14335e 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -53,27 +53,27 @@ enum class TextureFormat : u32 {
53 BC4 = 0x27, 53 BC4 = 0x27,
54 BC5 = 0x28, 54 BC5 = 0x28,
55 S8D24 = 0x29, 55 S8D24 = 0x29,
56 X8Z24 = 0x2a, 56 X8D24 = 0x2a,
57 D24S8 = 0x2b, 57 D24S8 = 0x2b,
58 X4V4Z24__COV4R4V = 0x2c, 58 X4V4D24__COV4R4V = 0x2c,
59 X4V4Z24__COV8R8V = 0x2d, 59 X4V4D24__COV8R8V = 0x2d,
60 V8Z24__COV4R12V = 0x2e, 60 V8D24__COV4R12V = 0x2e,
61 D32 = 0x2f, 61 D32 = 0x2f,
62 D32S8 = 0x30, 62 D32S8 = 0x30,
63 X8Z24_X20V4S8__COV4R4V = 0x31, 63 X8D24_X20V4S8__COV4R4V = 0x31,
64 X8Z24_X20V4S8__COV8R8V = 0x32, 64 X8D24_X20V4S8__COV8R8V = 0x32,
65 ZF32_X20V4X8__COV4R4V = 0x33, 65 D32_X20V4X8__COV4R4V = 0x33,
66 ZF32_X20V4X8__COV8R8V = 0x34, 66 D32_X20V4X8__COV8R8V = 0x34,
67 ZF32_X20V4S8__COV4R4V = 0x35, 67 D32_X20V4S8__COV4R4V = 0x35,
68 ZF32_X20V4S8__COV8R8V = 0x36, 68 D32_X20V4S8__COV8R8V = 0x36,
69 X8Z24_X16V8S8__COV4R12V = 0x37, 69 X8D24_X16V8S8__COV4R12V = 0x37,
70 ZF32_X16V8X8__COV4R12V = 0x38, 70 D32_X16V8X8__COV4R12V = 0x38,
71 ZF32_X16V8S8__COV4R12V = 0x39, 71 D32_X16V8S8__COV4R12V = 0x39,
72 D16 = 0x3a, 72 D16 = 0x3a,
73 V8Z24__COV8R24V = 0x3b, 73 V8D24__COV8R24V = 0x3b,
74 X8Z24_X16V8S8__COV8R24V = 0x3c, 74 X8D24_X16V8S8__COV8R24V = 0x3c,
75 ZF32_X16V8X8__COV8R24V = 0x3d, 75 D32_X16V8X8__COV8R24V = 0x3d,
76 ZF32_X16V8S8__COV8R24V = 0x3e, 76 D32_X16V8S8__COV8R24V = 0x3e,
77 ASTC_2D_4X4 = 0x40, 77 ASTC_2D_4X4 = 0x40,
78 ASTC_2D_5X5 = 0x41, 78 ASTC_2D_5X5 = 0x41,
79 ASTC_2D_6X6 = 0x42, 79 ASTC_2D_6X6 = 0x42,
@@ -146,7 +146,7 @@ enum class MsaaMode : u32 {
146}; 146};
147 147
148union TextureHandle { 148union TextureHandle {
149 TextureHandle(u32 raw) : raw{raw} {} 149 /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
150 150
151 u32 raw; 151 u32 raw;
152 BitField<0, 20, u32> tic_id; 152 BitField<0, 20, u32> tic_id;
@@ -155,124 +155,124 @@ union TextureHandle {
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157struct TICEntry { 157struct TICEntry {
158 static constexpr u32 DefaultBlockHeight = 16;
159 static constexpr u32 DefaultBlockDepth = 1;
160
161 union {
162 u32 raw;
163 BitField<0, 7, TextureFormat> format;
164 BitField<7, 3, ComponentType> r_type;
165 BitField<10, 3, ComponentType> g_type;
166 BitField<13, 3, ComponentType> b_type;
167 BitField<16, 3, ComponentType> a_type;
168
169 BitField<19, 3, SwizzleSource> x_source;
170 BitField<22, 3, SwizzleSource> y_source;
171 BitField<25, 3, SwizzleSource> z_source;
172 BitField<28, 3, SwizzleSource> w_source;
173 };
174 u32 address_low;
175 union { 158 union {
176 BitField<0, 16, u32> address_high; 159 struct {
177 BitField<21, 3, TICHeaderVersion> header_version; 160 union {
178 }; 161 BitField<0, 7, TextureFormat> format;
179 union { 162 BitField<7, 3, ComponentType> r_type;
180 BitField<0, 3, u32> block_width; 163 BitField<10, 3, ComponentType> g_type;
181 BitField<3, 3, u32> block_height; 164 BitField<13, 3, ComponentType> b_type;
182 BitField<6, 3, u32> block_depth; 165 BitField<16, 3, ComponentType> a_type;
166
167 BitField<19, 3, SwizzleSource> x_source;
168 BitField<22, 3, SwizzleSource> y_source;
169 BitField<25, 3, SwizzleSource> z_source;
170 BitField<28, 3, SwizzleSource> w_source;
171 };
172 u32 address_low;
173 union {
174 BitField<0, 16, u32> address_high;
175 BitField<16, 5, u32> layer_base_3_7;
176 BitField<21, 3, TICHeaderVersion> header_version;
177 BitField<24, 1, u32> load_store_hint;
178 BitField<25, 4, u32> view_coherency_hash;
179 BitField<29, 3, u32> layer_base_8_10;
180 };
181 union {
182 BitField<0, 3, u32> block_width;
183 BitField<3, 3, u32> block_height;
184 BitField<6, 3, u32> block_depth;
183 185
184 BitField<10, 3, u32> tile_width_spacing; 186 BitField<10, 3, u32> tile_width_spacing;
185 187
186 // High 16 bits of the pitch value 188 // High 16 bits of the pitch value
187 BitField<0, 16, u32> pitch_high; 189 BitField<0, 16, u32> pitch_high;
188 BitField<26, 1, u32> use_header_opt_control; 190 BitField<26, 1, u32> use_header_opt_control;
189 BitField<27, 1, u32> depth_texture; 191 BitField<27, 1, u32> depth_texture;
190 BitField<28, 4, u32> max_mip_level; 192 BitField<28, 4, u32> max_mip_level;
191 193
192 BitField<0, 16, u32> buffer_high_width_minus_one; 194 BitField<0, 16, u32> buffer_high_width_minus_one;
193 }; 195 };
194 union { 196 union {
195 BitField<0, 16, u32> width_minus_1; 197 BitField<0, 16, u32> width_minus_one;
196 BitField<22, 1, u32> srgb_conversion; 198 BitField<16, 3, u32> layer_base_0_2;
197 BitField<23, 4, TextureType> texture_type; 199 BitField<22, 1, u32> srgb_conversion;
198 BitField<29, 3, u32> border_size; 200 BitField<23, 4, TextureType> texture_type;
201 BitField<29, 3, u32> border_size;
199 202
200 BitField<0, 16, u32> buffer_low_width_minus_one; 203 BitField<0, 16, u32> buffer_low_width_minus_one;
201 }; 204 };
202 union { 205 union {
203 BitField<0, 16, u32> height_minus_1; 206 BitField<0, 16, u32> height_minus_1;
204 BitField<16, 14, u32> depth_minus_1; 207 BitField<16, 14, u32> depth_minus_1;
205 }; 208 BitField<30, 1, u32> is_sparse;
206 union { 209 BitField<31, 1, u32> normalized_coords;
207 BitField<6, 13, u32> mip_lod_bias; 210 };
208 BitField<27, 3, u32> max_anisotropy; 211 union {
212 BitField<6, 13, u32> mip_lod_bias;
213 BitField<27, 3, u32> max_anisotropy;
214 };
215 union {
216 BitField<0, 4, u32> res_min_mip_level;
217 BitField<4, 4, u32> res_max_mip_level;
218 BitField<8, 4, MsaaMode> msaa_mode;
219 BitField<12, 12, u32> min_lod_clamp;
220 };
221 };
222 std::array<u64, 4> raw;
209 }; 223 };
210 224
211 union { 225 constexpr bool operator==(const TICEntry& rhs) const noexcept {
212 BitField<0, 4, u32> res_min_mip_level; 226 return raw == rhs.raw;
213 BitField<4, 4, u32> res_max_mip_level; 227 }
214 BitField<8, 4, MsaaMode> msaa_mode;
215 BitField<12, 12, u32> min_lod_clamp;
216 };
217 228
218 GPUVAddr Address() const { 229 constexpr bool operator!=(const TICEntry& rhs) const noexcept {
230 return raw != rhs.raw;
231 }
232
233 constexpr GPUVAddr Address() const {
219 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); 234 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
220 } 235 }
221 236
222 u32 Pitch() const { 237 constexpr u32 Pitch() const {
223 ASSERT(header_version == TICHeaderVersion::Pitch || 238 ASSERT(header_version == TICHeaderVersion::Pitch ||
224 header_version == TICHeaderVersion::PitchColorKey); 239 header_version == TICHeaderVersion::PitchColorKey);
225 // The pitch value is 21 bits, and is 32B aligned. 240 // The pitch value is 21 bits, and is 32B aligned.
226 return pitch_high << 5; 241 return pitch_high << 5;
227 } 242 }
228 243
229 u32 Width() const { 244 constexpr u32 Width() const {
230 if (header_version != TICHeaderVersion::OneDBuffer) { 245 if (header_version != TICHeaderVersion::OneDBuffer) {
231 return width_minus_1 + 1; 246 return width_minus_one + 1;
232 } 247 }
233 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; 248 return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
234 } 249 }
235 250
236 u32 Height() const { 251 constexpr u32 Height() const {
237 return height_minus_1 + 1; 252 return height_minus_1 + 1;
238 } 253 }
239 254
240 u32 Depth() const { 255 constexpr u32 Depth() const {
241 return depth_minus_1 + 1; 256 return depth_minus_1 + 1;
242 } 257 }
243 258
244 u32 BlockWidth() const { 259 constexpr u32 BaseLayer() const {
245 ASSERT(IsTiled()); 260 return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
246 return block_width;
247 }
248
249 u32 BlockHeight() const {
250 ASSERT(IsTiled());
251 return block_height;
252 }
253
254 u32 BlockDepth() const {
255 ASSERT(IsTiled());
256 return block_depth;
257 } 261 }
258 262
259 bool IsTiled() const { 263 constexpr bool IsBlockLinear() const {
260 return header_version == TICHeaderVersion::BlockLinear || 264 return header_version == TICHeaderVersion::BlockLinear ||
261 header_version == TICHeaderVersion::BlockLinearColorKey; 265 header_version == TICHeaderVersion::BlockLinearColorKey;
262 } 266 }
263 267
264 bool IsLineal() const { 268 constexpr bool IsPitchLinear() const {
265 return header_version == TICHeaderVersion::Pitch || 269 return header_version == TICHeaderVersion::Pitch ||
266 header_version == TICHeaderVersion::PitchColorKey; 270 header_version == TICHeaderVersion::PitchColorKey;
267 } 271 }
268 272
269 bool IsBuffer() const { 273 constexpr bool IsBuffer() const {
270 return header_version == TICHeaderVersion::OneDBuffer; 274 return header_version == TICHeaderVersion::OneDBuffer;
271 } 275 }
272
273 bool IsSrgbConversionEnabled() const {
274 return srgb_conversion != 0;
275 }
276}; 276};
277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); 277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
278 278
@@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {
309 Linear = 3, 309 Linear = 3,
310}; 310};
311 311
312enum class SamplerReduction : u32 {
313 WeightedAverage = 0,
314 Min = 1,
315 Max = 2,
316};
317
312enum class Anisotropy { 318enum class Anisotropy {
313 Default, 319 Default,
314 Filter2x, 320 Filter2x,
@@ -333,8 +339,12 @@ struct TSCEntry {
333 BitField<0, 2, TextureFilter> mag_filter; 339 BitField<0, 2, TextureFilter> mag_filter;
334 BitField<4, 2, TextureFilter> min_filter; 340 BitField<4, 2, TextureFilter> min_filter;
335 BitField<6, 2, TextureMipmapFilter> mipmap_filter; 341 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
342 BitField<8, 1, u32> cubemap_anisotropy;
336 BitField<9, 1, u32> cubemap_interface_filtering; 343 BitField<9, 1, u32> cubemap_interface_filtering;
344 BitField<10, 2, SamplerReduction> reduction_filter;
337 BitField<12, 13, u32> mip_lod_bias; 345 BitField<12, 13, u32> mip_lod_bias;
346 BitField<25, 1, u32> float_coord_normalization;
347 BitField<26, 5, u32> trilin_opt;
338 }; 348 };
339 union { 349 union {
340 BitField<0, 12, u32> min_lod_clamp; 350 BitField<0, 12, u32> min_lod_clamp;
@@ -347,32 +357,45 @@ struct TSCEntry {
347 }; 357 };
348 std::array<f32, 4> border_color; 358 std::array<f32, 4> border_color;
349 }; 359 };
350 std::array<u8, 0x20> raw; 360 std::array<u64, 4> raw;
351 }; 361 };
352 362
353 std::array<float, 4> GetBorderColor() const noexcept; 363 constexpr bool operator==(const TSCEntry& rhs) const noexcept {
364 return raw == rhs.raw;
365 }
366
367 constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
368 return raw != rhs.raw;
369 }
370
371 std::array<float, 4> BorderColor() const noexcept;
354 372
355 float GetMaxAnisotropy() const noexcept; 373 float MaxAnisotropy() const noexcept;
356 374
357 float GetMinLod() const { 375 float MinLod() const {
358 return static_cast<float>(min_lod_clamp) / 256.0f; 376 return static_cast<float>(min_lod_clamp) / 256.0f;
359 } 377 }
360 378
361 float GetMaxLod() const { 379 float MaxLod() const {
362 return static_cast<float>(max_lod_clamp) / 256.0f; 380 return static_cast<float>(max_lod_clamp) / 256.0f;
363 } 381 }
364 382
365 float GetLodBias() const { 383 float LodBias() const {
366 // Sign extend the 13-bit value. 384 // Sign extend the 13-bit value.
367 constexpr u32 mask = 1U << (13 - 1); 385 static constexpr u32 mask = 1U << (13 - 1);
368 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 386 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
369 } 387 }
370}; 388};
371static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 389static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
372 390
373struct FullTextureInfo { 391} // namespace Tegra::Texture
374 TICEntry tic; 392
375 TSCEntry tsc; 393template <>
394struct std::hash<Tegra::Texture::TICEntry> {
395 size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
376}; 396};
377 397
378} // namespace Tegra::Texture 398template <>
399struct std::hash<Tegra::Texture::TSCEntry> {
400 size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
401};
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index dd5cee4a1..53444e945 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -7,13 +7,9 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/settings.h" 9#include "core/settings.h"
10#include "video_core/gpu_asynch.h"
11#include "video_core/gpu_synch.h"
12#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
13#include "video_core/renderer_opengl/renderer_opengl.h" 11#include "video_core/renderer_opengl/renderer_opengl.h"
14#ifdef HAS_VULKAN
15#include "video_core/renderer_vulkan/renderer_vulkan.h" 12#include "video_core/renderer_vulkan/renderer_vulkan.h"
16#endif
17#include "video_core/video_core.h" 13#include "video_core/video_core.h"
18 14
19namespace { 15namespace {
@@ -28,11 +24,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
28 case Settings::RendererBackend::OpenGL: 24 case Settings::RendererBackend::OpenGL:
29 return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, 25 return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory,
30 gpu, std::move(context)); 26 gpu, std::move(context));
31#ifdef HAS_VULKAN
32 case Settings::RendererBackend::Vulkan: 27 case Settings::RendererBackend::Vulkan:
33 return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, 28 return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
34 gpu, std::move(context)); 29 gpu, std::move(context));
35#endif
36 default: 30 default:
37 return nullptr; 31 return nullptr;
38 } 32 }
@@ -43,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
43namespace VideoCore { 37namespace VideoCore {
44 38
45std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { 39std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
46 std::unique_ptr<Tegra::GPU> gpu;
47 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); 40 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
48 if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 41 std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
49 gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec); 42 system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
50 } else {
51 gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec);
52 }
53 43
54 auto context = emu_window.CreateSharedContext(); 44 auto context = emu_window.CreateSharedContext();
55 const auto scope = context->Acquire(); 45 const auto scope = context->Acquire();
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index b16b54032..e1bab2112 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -141,6 +141,8 @@ add_executable(yuzu
141 util/limitable_input_dialog.h 141 util/limitable_input_dialog.h
142 util/sequence_dialog/sequence_dialog.cpp 142 util/sequence_dialog/sequence_dialog.cpp
143 util/sequence_dialog/sequence_dialog.h 143 util/sequence_dialog/sequence_dialog.h
144 util/url_request_interceptor.cpp
145 util/url_request_interceptor.h
144 util/util.cpp 146 util/util.cpp
145 util/util.h 147 util/util.h
146 compatdb.cpp 148 compatdb.cpp
@@ -217,7 +219,8 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core)
217target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::Widgets) 219target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::Widgets)
218target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) 220target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
219 221
220if (ENABLE_VULKAN AND NOT WIN32) 222target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include)
223if (NOT WIN32)
221 target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS}) 224 target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
222endif() 225endif()
223 226
@@ -278,8 +281,3 @@ endif()
278if (NOT APPLE) 281if (NOT APPLE)
279 target_compile_definitions(yuzu PRIVATE HAS_OPENGL) 282 target_compile_definitions(yuzu PRIVATE HAS_OPENGL)
280endif() 283endif()
281
282if (ENABLE_VULKAN)
283 target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include)
284 target_compile_definitions(yuzu PRIVATE HAS_VULKAN)
285endif()
diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp
index 6944478f3..a15e8ca2a 100644
--- a/src/yuzu/applets/controller.cpp
+++ b/src/yuzu/applets/controller.cpp
@@ -660,8 +660,8 @@ QtControllerSelector::QtControllerSelector(GMainWindow& parent) {
660QtControllerSelector::~QtControllerSelector() = default; 660QtControllerSelector::~QtControllerSelector() = default;
661 661
662void QtControllerSelector::ReconfigureControllers( 662void QtControllerSelector::ReconfigureControllers(
663 std::function<void()> callback, const Core::Frontend::ControllerParameters& parameters) const { 663 std::function<void()> callback_, const Core::Frontend::ControllerParameters& parameters) const {
664 this->callback = std::move(callback); 664 callback = std::move(callback_);
665 emit MainWindowReconfigureControllers(parameters); 665 emit MainWindowReconfigureControllers(parameters);
666} 666}
667 667
diff --git a/src/yuzu/applets/controller.h b/src/yuzu/applets/controller.h
index 7a421d856..3518eed56 100644
--- a/src/yuzu/applets/controller.h
+++ b/src/yuzu/applets/controller.h
@@ -147,7 +147,7 @@ public:
147 ~QtControllerSelector() override; 147 ~QtControllerSelector() override;
148 148
149 void ReconfigureControllers( 149 void ReconfigureControllers(
150 std::function<void()> callback, 150 std::function<void()> callback_,
151 const Core::Frontend::ControllerParameters& parameters) const override; 151 const Core::Frontend::ControllerParameters& parameters) const override;
152 152
153signals: 153signals:
diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp
index 08ed57355..53a993cf6 100644
--- a/src/yuzu/applets/error.cpp
+++ b/src/yuzu/applets/error.cpp
@@ -17,7 +17,7 @@ QtErrorDisplay::QtErrorDisplay(GMainWindow& parent) {
17QtErrorDisplay::~QtErrorDisplay() = default; 17QtErrorDisplay::~QtErrorDisplay() = default;
18 18
19void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const { 19void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const {
20 this->callback = std::move(finished); 20 callback = std::move(finished);
21 emit MainWindowDisplayError( 21 emit MainWindowDisplayError(
22 tr("An error has occured.\nPlease try again or contact the developer of the " 22 tr("An error has occured.\nPlease try again or contact the developer of the "
23 "software.\n\nError Code: %1-%2 (0x%3)") 23 "software.\n\nError Code: %1-%2 (0x%3)")
@@ -28,7 +28,7 @@ void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished)
28 28
29void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, 29void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time,
30 std::function<void()> finished) const { 30 std::function<void()> finished) const {
31 this->callback = std::move(finished); 31 callback = std::move(finished);
32 32
33 const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count()); 33 const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count());
34 emit MainWindowDisplayError( 34 emit MainWindowDisplayError(
@@ -44,7 +44,7 @@ void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::secon
44void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_text, 44void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_text,
45 std::string fullscreen_text, 45 std::string fullscreen_text,
46 std::function<void()> finished) const { 46 std::function<void()> finished) const {
47 this->callback = std::move(finished); 47 callback = std::move(finished);
48 emit MainWindowDisplayError( 48 emit MainWindowDisplayError(
49 tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") 49 tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5")
50 .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) 50 .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index c9a2f8601..4bf2bfd40 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -150,8 +150,8 @@ QtProfileSelector::QtProfileSelector(GMainWindow& parent) {
150QtProfileSelector::~QtProfileSelector() = default; 150QtProfileSelector::~QtProfileSelector() = default;
151 151
152void QtProfileSelector::SelectProfile( 152void QtProfileSelector::SelectProfile(
153 std::function<void(std::optional<Common::UUID>)> callback) const { 153 std::function<void(std::optional<Common::UUID>)> callback_) const {
154 this->callback = std::move(callback); 154 callback = std::move(callback_);
155 emit MainWindowSelectProfile(); 155 emit MainWindowSelectProfile();
156} 156}
157 157
diff --git a/src/yuzu/applets/profile_select.h b/src/yuzu/applets/profile_select.h
index 29c33cca0..4e9037488 100644
--- a/src/yuzu/applets/profile_select.h
+++ b/src/yuzu/applets/profile_select.h
@@ -60,7 +60,7 @@ public:
60 explicit QtProfileSelector(GMainWindow& parent); 60 explicit QtProfileSelector(GMainWindow& parent);
61 ~QtProfileSelector() override; 61 ~QtProfileSelector() override;
62 62
63 void SelectProfile(std::function<void(std::optional<Common::UUID>)> callback) const override; 63 void SelectProfile(std::function<void(std::optional<Common::UUID>)> callback_) const override;
64 64
65signals: 65signals:
66 void MainWindowSelectProfile() const; 66 void MainWindowSelectProfile() const;
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
index af36f07c6..ab8cfd8ee 100644
--- a/src/yuzu/applets/software_keyboard.cpp
+++ b/src/yuzu/applets/software_keyboard.cpp
@@ -135,8 +135,8 @@ void QtSoftwareKeyboard::RequestText(std::function<void(std::optional<std::u16st
135} 135}
136 136
137void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message, 137void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message,
138 std::function<void()> finished_check) const { 138 std::function<void()> finished_check_) const {
139 this->finished_check = std::move(finished_check); 139 finished_check = std::move(finished_check_);
140 emit MainWindowTextCheckDialog(error_message); 140 emit MainWindowTextCheckDialog(error_message);
141} 141}
142 142
diff --git a/src/yuzu/applets/software_keyboard.h b/src/yuzu/applets/software_keyboard.h
index 44bcece75..9e1094cce 100644
--- a/src/yuzu/applets/software_keyboard.h
+++ b/src/yuzu/applets/software_keyboard.h
@@ -61,7 +61,7 @@ public:
61 void RequestText(std::function<void(std::optional<std::u16string>)> out, 61 void RequestText(std::function<void(std::optional<std::u16string>)> out,
62 Core::Frontend::SoftwareKeyboardParameters parameters) const override; 62 Core::Frontend::SoftwareKeyboardParameters parameters) const override;
63 void SendTextCheckDialog(std::u16string error_message, 63 void SendTextCheckDialog(std::u16string error_message,
64 std::function<void()> finished_check) const override; 64 std::function<void()> finished_check_) const override;
65 65
66signals: 66signals:
67 void MainWindowGetText(Core::Frontend::SoftwareKeyboardParameters parameters) const; 67 void MainWindowGetText(Core::Frontend::SoftwareKeyboardParameters parameters) const;
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 33f1c385d..e482ba029 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -1,115 +1,414 @@
1// Copyright 2018 yuzu Emulator Project 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#ifdef YUZU_USE_QT_WEB_ENGINE
6
7#include <QKeyEvent> 6#include <QKeyEvent>
8 7
9#include "core/hle/lock.h" 8#include <QWebEngineProfile>
9#include <QWebEngineScript>
10#include <QWebEngineScriptCollection>
11#include <QWebEngineSettings>
12#include <QWebEngineUrlScheme>
13#endif
14
15#include "common/file_util.h"
16#include "core/core.h"
17#include "core/frontend/input_interpreter.h"
18#include "input_common/keyboard.h"
19#include "input_common/main.h"
10#include "yuzu/applets/web_browser.h" 20#include "yuzu/applets/web_browser.h"
21#include "yuzu/applets/web_browser_scripts.h"
11#include "yuzu/main.h" 22#include "yuzu/main.h"
23#include "yuzu/util/url_request_interceptor.h"
12 24
13#ifdef YUZU_USE_QT_WEB_ENGINE 25#ifdef YUZU_USE_QT_WEB_ENGINE
14 26
15constexpr char NX_SHIM_INJECT_SCRIPT[] = R"( 27namespace {
16 window.nx = {};
17 window.nx.playReport = {};
18 window.nx.playReport.setCounterSetIdentifier = function () {
19 console.log("nx.playReport.setCounterSetIdentifier called - unimplemented");
20 };
21 28
22 window.nx.playReport.incrementCounter = function () { 29constexpr int HIDButtonToKey(HIDButton button) {
23 console.log("nx.playReport.incrementCounter called - unimplemented"); 30 switch (button) {
24 }; 31 case HIDButton::DLeft:
32 case HIDButton::LStickLeft:
33 return Qt::Key_Left;
34 case HIDButton::DUp:
35 case HIDButton::LStickUp:
36 return Qt::Key_Up;
37 case HIDButton::DRight:
38 case HIDButton::LStickRight:
39 return Qt::Key_Right;
40 case HIDButton::DDown:
41 case HIDButton::LStickDown:
42 return Qt::Key_Down;
43 default:
44 return 0;
45 }
46}
47
48} // Anonymous namespace
49
50QtNXWebEngineView::QtNXWebEngineView(QWidget* parent, Core::System& system,
51 InputCommon::InputSubsystem* input_subsystem_)
52 : QWebEngineView(parent), input_subsystem{input_subsystem_},
53 url_interceptor(std::make_unique<UrlRequestInterceptor>()),
54 input_interpreter(std::make_unique<InputInterpreter>(system)),
55 default_profile{QWebEngineProfile::defaultProfile()},
56 global_settings{QWebEngineSettings::globalSettings()} {
57 QWebEngineScript gamepad;
58 QWebEngineScript window_nx;
59
60 gamepad.setName(QStringLiteral("gamepad_script.js"));
61 window_nx.setName(QStringLiteral("window_nx_script.js"));
62
63 gamepad.setSourceCode(QString::fromStdString(GAMEPAD_SCRIPT));
64 window_nx.setSourceCode(QString::fromStdString(WINDOW_NX_SCRIPT));
65
66 gamepad.setInjectionPoint(QWebEngineScript::DocumentCreation);
67 window_nx.setInjectionPoint(QWebEngineScript::DocumentCreation);
68
69 gamepad.setWorldId(QWebEngineScript::MainWorld);
70 window_nx.setWorldId(QWebEngineScript::MainWorld);
71
72 gamepad.setRunsOnSubFrames(true);
73 window_nx.setRunsOnSubFrames(true);
74
75 default_profile->scripts()->insert(gamepad);
76 default_profile->scripts()->insert(window_nx);
77
78 default_profile->setRequestInterceptor(url_interceptor.get());
79
80 global_settings->setAttribute(QWebEngineSettings::LocalContentCanAccessRemoteUrls, true);
81 global_settings->setAttribute(QWebEngineSettings::FullScreenSupportEnabled, true);
82 global_settings->setAttribute(QWebEngineSettings::AllowRunningInsecureContent, true);
83 global_settings->setAttribute(QWebEngineSettings::FocusOnNavigationEnabled, true);
84 global_settings->setAttribute(QWebEngineSettings::AllowWindowActivationFromJavaScript, true);
85 global_settings->setAttribute(QWebEngineSettings::ShowScrollBars, false);
86
87 global_settings->setFontFamily(QWebEngineSettings::StandardFont, QStringLiteral("Roboto"));
88
89 connect(
90 page(), &QWebEnginePage::windowCloseRequested, page(),
91 [this] {
92 if (page()->url() == url_interceptor->GetRequestedURL()) {
93 SetFinished(true);
94 SetExitReason(Service::AM::Applets::WebExitReason::WindowClosed);
95 }
96 },
97 Qt::QueuedConnection);
98}
99
100QtNXWebEngineView::~QtNXWebEngineView() {
101 SetFinished(true);
102 StopInputThread();
103}
104
105void QtNXWebEngineView::LoadLocalWebPage(std::string_view main_url,
106 std::string_view additional_args) {
107 is_local = true;
108
109 LoadExtractedFonts();
110 SetUserAgent(UserAgent::WebApplet);
111 SetFinished(false);
112 SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed);
113 SetLastURL("http://localhost/");
114 StartInputThread();
115
116 load(QUrl(QUrl::fromLocalFile(QString::fromStdString(std::string(main_url))).toString() +
117 QString::fromStdString(std::string(additional_args))));
118}
119
120void QtNXWebEngineView::LoadExternalWebPage(std::string_view main_url,
121 std::string_view additional_args) {
122 is_local = false;
123
124 SetUserAgent(UserAgent::WebApplet);
125 SetFinished(false);
126 SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed);
127 SetLastURL("http://localhost/");
128 StartInputThread();
129
130 load(QUrl(QString::fromStdString(std::string(main_url)) +
131 QString::fromStdString(std::string(additional_args))));
132}
133
134void QtNXWebEngineView::SetUserAgent(UserAgent user_agent) {
135 const QString user_agent_str = [user_agent] {
136 switch (user_agent) {
137 case UserAgent::WebApplet:
138 default:
139 return QStringLiteral("WebApplet");
140 case UserAgent::ShopN:
141 return QStringLiteral("ShopN");
142 case UserAgent::LoginApplet:
143 return QStringLiteral("LoginApplet");
144 case UserAgent::ShareApplet:
145 return QStringLiteral("ShareApplet");
146 case UserAgent::LobbyApplet:
147 return QStringLiteral("LobbyApplet");
148 case UserAgent::WifiWebAuthApplet:
149 return QStringLiteral("WifiWebAuthApplet");
150 }
151 }();
152
153 QWebEngineProfile::defaultProfile()->setHttpUserAgent(
154 QStringLiteral("Mozilla/5.0 (Nintendo Switch; %1) AppleWebKit/606.4 "
155 "(KHTML, like Gecko) NF/6.0.1.15.4 NintendoBrowser/5.1.0.20389")
156 .arg(user_agent_str));
157}
158
159bool QtNXWebEngineView::IsFinished() const {
160 return finished;
161}
162
163void QtNXWebEngineView::SetFinished(bool finished_) {
164 finished = finished_;
165}
166
167Service::AM::Applets::WebExitReason QtNXWebEngineView::GetExitReason() const {
168 return exit_reason;
169}
170
171void QtNXWebEngineView::SetExitReason(Service::AM::Applets::WebExitReason exit_reason_) {
172 exit_reason = exit_reason_;
173}
174
175const std::string& QtNXWebEngineView::GetLastURL() const {
176 return last_url;
177}
178
179void QtNXWebEngineView::SetLastURL(std::string last_url_) {
180 last_url = std::move(last_url_);
181}
182
183QString QtNXWebEngineView::GetCurrentURL() const {
184 return url_interceptor->GetRequestedURL().toString();
185}
186
187void QtNXWebEngineView::hide() {
188 SetFinished(true);
189 StopInputThread();
25 190
26 window.nx.footer = {}; 191 QWidget::hide();
27 window.nx.footer.unsetAssign = function () { 192}
28 console.log("nx.footer.unsetAssign called - unimplemented"); 193
194void QtNXWebEngineView::keyPressEvent(QKeyEvent* event) {
195 if (is_local) {
196 input_subsystem->GetKeyboard()->PressKey(event->key());
197 }
198}
199
200void QtNXWebEngineView::keyReleaseEvent(QKeyEvent* event) {
201 if (is_local) {
202 input_subsystem->GetKeyboard()->ReleaseKey(event->key());
203 }
204}
205
206template <HIDButton... T>
207void QtNXWebEngineView::HandleWindowFooterButtonPressedOnce() {
208 const auto f = [this](HIDButton button) {
209 if (input_interpreter->IsButtonPressedOnce(button)) {
210 page()->runJavaScript(
211 QStringLiteral("yuzu_key_callbacks[%1] == null;").arg(static_cast<u8>(button)),
212 [&](const QVariant& variant) {
213 if (variant.toBool()) {
214 switch (button) {
215 case HIDButton::A:
216 SendMultipleKeyPressEvents<Qt::Key_A, Qt::Key_Space, Qt::Key_Return>();
217 break;
218 case HIDButton::B:
219 SendKeyPressEvent(Qt::Key_B);
220 break;
221 case HIDButton::X:
222 SendKeyPressEvent(Qt::Key_X);
223 break;
224 case HIDButton::Y:
225 SendKeyPressEvent(Qt::Key_Y);
226 break;
227 default:
228 break;
229 }
230 }
231 });
232
233 page()->runJavaScript(
234 QStringLiteral("if (yuzu_key_callbacks[%1] != null) { yuzu_key_callbacks[%1](); }")
235 .arg(static_cast<u8>(button)));
236 }
29 }; 237 };
30 238
31 var yuzu_key_callbacks = []; 239 (f(T), ...);
32 window.nx.footer.setAssign = function(key, discard1, func, discard2) { 240}
33 switch (key) { 241
34 case 'A': 242template <HIDButton... T>
35 yuzu_key_callbacks[0] = func; 243void QtNXWebEngineView::HandleWindowKeyButtonPressedOnce() {
36 break; 244 const auto f = [this](HIDButton button) {
37 case 'B': 245 if (input_interpreter->IsButtonPressedOnce(button)) {
38 yuzu_key_callbacks[1] = func; 246 SendKeyPressEvent(HIDButtonToKey(button));
39 break;
40 case 'X':
41 yuzu_key_callbacks[2] = func;
42 break;
43 case 'Y':
44 yuzu_key_callbacks[3] = func;
45 break;
46 case 'L':
47 yuzu_key_callbacks[6] = func;
48 break;
49 case 'R':
50 yuzu_key_callbacks[7] = func;
51 break;
52 } 247 }
53 }; 248 };
54 249
55 var applet_done = false; 250 (f(T), ...);
56 window.nx.endApplet = function() { 251}
57 applet_done = true; 252
253template <HIDButton... T>
254void QtNXWebEngineView::HandleWindowKeyButtonHold() {
255 const auto f = [this](HIDButton button) {
256 if (input_interpreter->IsButtonHeld(button)) {
257 SendKeyPressEvent(HIDButtonToKey(button));
258 }
58 }; 259 };
59 260
60 window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } }; 261 (f(T), ...);
61)"; 262}
263
264void QtNXWebEngineView::SendKeyPressEvent(int key) {
265 if (key == 0) {
266 return;
267 }
268
269 QCoreApplication::postEvent(focusProxy(),
270 new QKeyEvent(QKeyEvent::KeyPress, key, Qt::NoModifier));
271 QCoreApplication::postEvent(focusProxy(),
272 new QKeyEvent(QKeyEvent::KeyRelease, key, Qt::NoModifier));
273}
274
275void QtNXWebEngineView::StartInputThread() {
276 if (input_thread_running) {
277 return;
278 }
279
280 input_thread_running = true;
281 input_thread = std::thread(&QtNXWebEngineView::InputThread, this);
282}
283
284void QtNXWebEngineView::StopInputThread() {
285 if (is_local) {
286 QWidget::releaseKeyboard();
287 }
62 288
63QString GetNXShimInjectionScript() { 289 input_thread_running = false;
64 return QString::fromStdString(NX_SHIM_INJECT_SCRIPT); 290 if (input_thread.joinable()) {
291 input_thread.join();
292 }
65} 293}
66 294
67NXInputWebEngineView::NXInputWebEngineView(QWidget* parent) : QWebEngineView(parent) {} 295void QtNXWebEngineView::InputThread() {
296 // Wait for 1 second before allowing any inputs to be processed.
297 std::this_thread::sleep_for(std::chrono::seconds(1));
298
299 if (is_local) {
300 QWidget::grabKeyboard();
301 }
302
303 while (input_thread_running) {
304 input_interpreter->PollInput();
305
306 HandleWindowFooterButtonPressedOnce<HIDButton::A, HIDButton::B, HIDButton::X, HIDButton::Y,
307 HIDButton::L, HIDButton::R>();
308
309 HandleWindowKeyButtonPressedOnce<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight,
310 HIDButton::DDown, HIDButton::LStickLeft,
311 HIDButton::LStickUp, HIDButton::LStickRight,
312 HIDButton::LStickDown>();
68 313
69void NXInputWebEngineView::keyPressEvent(QKeyEvent* event) { 314 HandleWindowKeyButtonHold<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight,
70 parent()->event(event); 315 HIDButton::DDown, HIDButton::LStickLeft, HIDButton::LStickUp,
316 HIDButton::LStickRight, HIDButton::LStickDown>();
317
318 std::this_thread::sleep_for(std::chrono::milliseconds(50));
319 }
71} 320}
72 321
73void NXInputWebEngineView::keyReleaseEvent(QKeyEvent* event) { 322void QtNXWebEngineView::LoadExtractedFonts() {
74 parent()->event(event); 323 QWebEngineScript nx_font_css;
324 QWebEngineScript load_nx_font;
325
326 const QString fonts_dir = QString::fromStdString(Common::FS::SanitizePath(
327 fmt::format("{}/fonts", Common::FS::GetUserPath(Common::FS::UserPath::CacheDir))));
328
329 nx_font_css.setName(QStringLiteral("nx_font_css.js"));
330 load_nx_font.setName(QStringLiteral("load_nx_font.js"));
331
332 nx_font_css.setSourceCode(
333 QString::fromStdString(NX_FONT_CSS)
334 .arg(fonts_dir + QStringLiteral("/FontStandard.ttf"))
335 .arg(fonts_dir + QStringLiteral("/FontChineseSimplified.ttf"))
336 .arg(fonts_dir + QStringLiteral("/FontExtendedChineseSimplified.ttf"))
337 .arg(fonts_dir + QStringLiteral("/FontChineseTraditional.ttf"))
338 .arg(fonts_dir + QStringLiteral("/FontKorean.ttf"))
339 .arg(fonts_dir + QStringLiteral("/FontNintendoExtended.ttf"))
340 .arg(fonts_dir + QStringLiteral("/FontNintendoExtended2.ttf")));
341 load_nx_font.setSourceCode(QString::fromStdString(LOAD_NX_FONT));
342
343 nx_font_css.setInjectionPoint(QWebEngineScript::DocumentReady);
344 load_nx_font.setInjectionPoint(QWebEngineScript::Deferred);
345
346 nx_font_css.setWorldId(QWebEngineScript::MainWorld);
347 load_nx_font.setWorldId(QWebEngineScript::MainWorld);
348
349 nx_font_css.setRunsOnSubFrames(true);
350 load_nx_font.setRunsOnSubFrames(true);
351
352 default_profile->scripts()->insert(nx_font_css);
353 default_profile->scripts()->insert(load_nx_font);
354
355 connect(
356 url_interceptor.get(), &UrlRequestInterceptor::FrameChanged, url_interceptor.get(),
357 [this] {
358 std::this_thread::sleep_for(std::chrono::milliseconds(50));
359 page()->runJavaScript(QString::fromStdString(LOAD_NX_FONT));
360 },
361 Qt::QueuedConnection);
75} 362}
76 363
77#endif 364#endif
78 365
79QtWebBrowser::QtWebBrowser(GMainWindow& main_window) { 366QtWebBrowser::QtWebBrowser(GMainWindow& main_window) {
80 connect(this, &QtWebBrowser::MainWindowOpenPage, &main_window, &GMainWindow::WebBrowserOpenPage, 367 connect(this, &QtWebBrowser::MainWindowOpenWebPage, &main_window,
81 Qt::QueuedConnection); 368 &GMainWindow::WebBrowserOpenWebPage, Qt::QueuedConnection);
82 connect(&main_window, &GMainWindow::WebBrowserUnpackRomFS, this, 369 connect(&main_window, &GMainWindow::WebBrowserExtractOfflineRomFS, this,
83 &QtWebBrowser::MainWindowUnpackRomFS, Qt::QueuedConnection); 370 &QtWebBrowser::MainWindowExtractOfflineRomFS, Qt::QueuedConnection);
84 connect(&main_window, &GMainWindow::WebBrowserFinishedBrowsing, this, 371 connect(&main_window, &GMainWindow::WebBrowserClosed, this,
85 &QtWebBrowser::MainWindowFinishedBrowsing, Qt::QueuedConnection); 372 &QtWebBrowser::MainWindowWebBrowserClosed, Qt::QueuedConnection);
86} 373}
87 374
88QtWebBrowser::~QtWebBrowser() = default; 375QtWebBrowser::~QtWebBrowser() = default;
89 376
90void QtWebBrowser::OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, 377void QtWebBrowser::OpenLocalWebPage(
91 std::function<void()> finished_callback) { 378 std::string_view local_url, std::function<void()> extract_romfs_callback_,
92 this->unpack_romfs_callback = std::move(unpack_romfs_callback); 379 std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const {
93 this->finished_callback = std::move(finished_callback); 380 extract_romfs_callback = std::move(extract_romfs_callback_);
381 callback = std::move(callback_);
382
383 const auto index = local_url.find('?');
384
385 if (index == std::string::npos) {
386 emit MainWindowOpenWebPage(local_url, "", true);
387 } else {
388 emit MainWindowOpenWebPage(local_url.substr(0, index), local_url.substr(index), true);
389 }
390}
391
392void QtWebBrowser::OpenExternalWebPage(
393 std::string_view external_url,
394 std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const {
395 callback = std::move(callback_);
396
397 const auto index = external_url.find('?');
94 398
95 const auto index = url.find('?');
96 if (index == std::string::npos) { 399 if (index == std::string::npos) {
97 emit MainWindowOpenPage(url, ""); 400 emit MainWindowOpenWebPage(external_url, "", false);
98 } else { 401 } else {
99 const auto front = url.substr(0, index); 402 emit MainWindowOpenWebPage(external_url.substr(0, index), external_url.substr(index),
100 const auto back = url.substr(index); 403 false);
101 emit MainWindowOpenPage(front, back);
102 } 404 }
103} 405}
104 406
105void QtWebBrowser::MainWindowUnpackRomFS() { 407void QtWebBrowser::MainWindowExtractOfflineRomFS() {
106 // Acquire the HLE mutex 408 extract_romfs_callback();
107 std::lock_guard lock{HLE::g_hle_lock};
108 unpack_romfs_callback();
109} 409}
110 410
111void QtWebBrowser::MainWindowFinishedBrowsing() { 411void QtWebBrowser::MainWindowWebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason,
112 // Acquire the HLE mutex 412 std::string last_url) {
113 std::lock_guard lock{HLE::g_hle_lock}; 413 callback(exit_reason, last_url);
114 finished_callback();
115} 414}
diff --git a/src/yuzu/applets/web_browser.h b/src/yuzu/applets/web_browser.h
index b38437e46..47f960d69 100644
--- a/src/yuzu/applets/web_browser.h
+++ b/src/yuzu/applets/web_browser.h
@@ -1,10 +1,13 @@
1// Copyright 2018 yuzu Emulator Project 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <functional> 7#include <atomic>
8#include <memory>
9#include <thread>
10
8#include <QObject> 11#include <QObject>
9 12
10#ifdef YUZU_USE_QT_WEB_ENGINE 13#ifdef YUZU_USE_QT_WEB_ENGINE
@@ -13,19 +16,172 @@
13 16
14#include "core/frontend/applets/web_browser.h" 17#include "core/frontend/applets/web_browser.h"
15 18
19enum class HIDButton : u8;
20
16class GMainWindow; 21class GMainWindow;
22class InputInterpreter;
23class UrlRequestInterceptor;
24
25namespace Core {
26class System;
27}
28
29namespace InputCommon {
30class InputSubsystem;
31}
17 32
18#ifdef YUZU_USE_QT_WEB_ENGINE 33#ifdef YUZU_USE_QT_WEB_ENGINE
19 34
20QString GetNXShimInjectionScript(); 35enum class UserAgent {
36 WebApplet,
37 ShopN,
38 LoginApplet,
39 ShareApplet,
40 LobbyApplet,
41 WifiWebAuthApplet,
42};
43
44class QWebEngineProfile;
45class QWebEngineSettings;
46
47class QtNXWebEngineView : public QWebEngineView {
48 Q_OBJECT
21 49
22class NXInputWebEngineView : public QWebEngineView {
23public: 50public:
24 explicit NXInputWebEngineView(QWidget* parent = nullptr); 51 explicit QtNXWebEngineView(QWidget* parent, Core::System& system,
52 InputCommon::InputSubsystem* input_subsystem_);
53 ~QtNXWebEngineView() override;
54
55 /**
56 * Loads a HTML document that exists locally. Cannot be used to load external websites.
57 *
58 * @param main_url The url to the file.
59 * @param additional_args Additional arguments appended to the main url.
60 */
61 void LoadLocalWebPage(std::string_view main_url, std::string_view additional_args);
62
63 /**
64 * Loads an external website. Cannot be used to load local urls.
65 *
66 * @param main_url The url to the website.
67 * @param additional_args Additional arguments appended to the main url.
68 */
69 void LoadExternalWebPage(std::string_view main_url, std::string_view additional_args);
70
71 /**
72 * Sets the background color of the web page.
73 *
74 * @param color The color to set.
75 */
76 void SetBackgroundColor(QColor color);
77
78 /**
79 * Sets the user agent of the web browser.
80 *
81 * @param user_agent The user agent enum.
82 */
83 void SetUserAgent(UserAgent user_agent);
84
85 [[nodiscard]] bool IsFinished() const;
86 void SetFinished(bool finished_);
87
88 [[nodiscard]] Service::AM::Applets::WebExitReason GetExitReason() const;
89 void SetExitReason(Service::AM::Applets::WebExitReason exit_reason_);
90
91 [[nodiscard]] const std::string& GetLastURL() const;
92 void SetLastURL(std::string last_url_);
93
94 /**
95 * This gets the current URL that has been requested by the webpage.
96 * This only applies to the main frame. Sub frames and other resources are ignored.
97 *
98 * @return Currently requested URL
99 */
100 [[nodiscard]] QString GetCurrentURL() const;
101
102public slots:
103 void hide();
25 104
26protected: 105protected:
27 void keyPressEvent(QKeyEvent* event) override; 106 void keyPressEvent(QKeyEvent* event) override;
28 void keyReleaseEvent(QKeyEvent* event) override; 107 void keyReleaseEvent(QKeyEvent* event) override;
108
109private:
110 /**
111 * Handles button presses to execute functions assigned in yuzu_key_callbacks.
112 * yuzu_key_callbacks contains specialized functions for the buttons in the window footer
113 * that can be overriden by games to achieve desired functionality.
114 *
115 * @tparam HIDButton The list of buttons contained in yuzu_key_callbacks
116 */
117 template <HIDButton... T>
118 void HandleWindowFooterButtonPressedOnce();
119
120 /**
121 * Handles button presses and converts them into keyboard input.
122 * This should only be used to convert D-Pad or Analog Stick input into arrow keys.
123 *
124 * @tparam HIDButton The list of buttons that can be converted into keyboard input.
125 */
126 template <HIDButton... T>
127 void HandleWindowKeyButtonPressedOnce();
128
129 /**
130 * Handles button holds and converts them into keyboard input.
131 * This should only be used to convert D-Pad or Analog Stick input into arrow keys.
132 *
133 * @tparam HIDButton The list of buttons that can be converted into keyboard input.
134 */
135 template <HIDButton... T>
136 void HandleWindowKeyButtonHold();
137
138 /**
139 * Sends a key press event to QWebEngineView.
140 *
141 * @param key Qt key code.
142 */
143 void SendKeyPressEvent(int key);
144
145 /**
146 * Sends multiple key press events to QWebEngineView.
147 *
148 * @tparam int Qt key code.
149 */
150 template <int... T>
151 void SendMultipleKeyPressEvents() {
152 (SendKeyPressEvent(T), ...);
153 }
154
155 void StartInputThread();
156 void StopInputThread();
157
158 /// The thread where input is being polled and processed.
159 void InputThread();
160
161 /// Loads the extracted fonts using JavaScript.
162 void LoadExtractedFonts();
163
164 InputCommon::InputSubsystem* input_subsystem;
165
166 std::unique_ptr<UrlRequestInterceptor> url_interceptor;
167
168 std::unique_ptr<InputInterpreter> input_interpreter;
169
170 std::thread input_thread;
171
172 std::atomic<bool> input_thread_running{};
173
174 std::atomic<bool> finished{};
175
176 Service::AM::Applets::WebExitReason exit_reason{
177 Service::AM::Applets::WebExitReason::EndButtonPressed};
178
179 std::string last_url{"http://localhost/"};
180
181 bool is_local{};
182
183 QWebEngineProfile* default_profile;
184 QWebEngineSettings* global_settings;
29}; 185};
30 186
31#endif 187#endif
@@ -34,19 +190,28 @@ class QtWebBrowser final : public QObject, public Core::Frontend::WebBrowserAppl
34 Q_OBJECT 190 Q_OBJECT
35 191
36public: 192public:
37 explicit QtWebBrowser(GMainWindow& main_window); 193 explicit QtWebBrowser(GMainWindow& parent);
38 ~QtWebBrowser() override; 194 ~QtWebBrowser() override;
39 195
40 void OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, 196 void OpenLocalWebPage(std::string_view local_url, std::function<void()> extract_romfs_callback_,
41 std::function<void()> finished_callback) override; 197 std::function<void(Service::AM::Applets::WebExitReason, std::string)>
198 callback_) const override;
199
200 void OpenExternalWebPage(std::string_view external_url,
201 std::function<void(Service::AM::Applets::WebExitReason, std::string)>
202 callback_) const override;
42 203
43signals: 204signals:
44 void MainWindowOpenPage(std::string_view filename, std::string_view additional_args) const; 205 void MainWindowOpenWebPage(std::string_view main_url, std::string_view additional_args,
206 bool is_local) const;
45 207
46private: 208private:
47 void MainWindowUnpackRomFS(); 209 void MainWindowExtractOfflineRomFS();
48 void MainWindowFinishedBrowsing(); 210
211 void MainWindowWebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason,
212 std::string last_url);
213
214 mutable std::function<void()> extract_romfs_callback;
49 215
50 std::function<void()> unpack_romfs_callback; 216 mutable std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback;
51 std::function<void()> finished_callback;
52}; 217};
diff --git a/src/yuzu/applets/web_browser_scripts.h b/src/yuzu/applets/web_browser_scripts.h
new file mode 100644
index 000000000..992837a85
--- /dev/null
+++ b/src/yuzu/applets/web_browser_scripts.h
@@ -0,0 +1,193 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7constexpr char NX_FONT_CSS[] = R"(
8(function() {
9 css = document.createElement('style');
10 css.type = 'text/css';
11 css.id = 'nx_font';
12 css.innerText = `
13/* FontStandard */
14@font-face {
15 font-family: 'FontStandard';
16 src: url('%1') format('truetype');
17}
18
19/* FontChineseSimplified */
20@font-face {
21 font-family: 'FontChineseSimplified';
22 src: url('%2') format('truetype');
23}
24
25/* FontExtendedChineseSimplified */
26@font-face {
27 font-family: 'FontExtendedChineseSimplified';
28 src: url('%3') format('truetype');
29}
30
31/* FontChineseTraditional */
32@font-face {
33 font-family: 'FontChineseTraditional';
34 src: url('%4') format('truetype');
35}
36
37/* FontKorean */
38@font-face {
39 font-family: 'FontKorean';
40 src: url('%5') format('truetype');
41}
42
43/* FontNintendoExtended */
44@font-face {
45 font-family: 'NintendoExt003';
46 src: url('%6') format('truetype');
47}
48
49/* FontNintendoExtended2 */
50@font-face {
51 font-family: 'NintendoExt003';
52 src: url('%7') format('truetype');
53}
54`;
55
56 document.head.appendChild(css);
57})();
58)";
59
60constexpr char LOAD_NX_FONT[] = R"(
61(function() {
62 var elements = document.querySelectorAll("*");
63
64 for (var i = 0; i < elements.length; i++) {
65 var style = window.getComputedStyle(elements[i], null);
66 if (style.fontFamily.includes("Arial") || style.fontFamily.includes("Calibri") ||
67 style.fontFamily.includes("Century") || style.fontFamily.includes("Times New Roman")) {
68 elements[i].style.fontFamily = "FontStandard, FontChineseSimplified, FontExtendedChineseSimplified, FontChineseTraditional, FontKorean, NintendoExt003";
69 } else {
70 elements[i].style.fontFamily = style.fontFamily + ", FontStandard, FontChineseSimplified, FontExtendedChineseSimplified, FontChineseTraditional, FontKorean, NintendoExt003";
71 }
72 }
73})();
74)";
75
76constexpr char GAMEPAD_SCRIPT[] = R"(
77window.addEventListener("gamepadconnected", function(e) {
78 console.log("Gamepad connected at index %d: %s. %d buttons, %d axes.",
79 e.gamepad.index, e.gamepad.id, e.gamepad.buttons.length, e.gamepad.axes.length);
80});
81
82window.addEventListener("gamepaddisconnected", function(e) {
83 console.log("Gamepad disconnected from index %d: %s", e.gamepad.index, e.gamepad.id);
84});
85)";
86
87constexpr char WINDOW_NX_SCRIPT[] = R"(
88var end_applet = false;
89var yuzu_key_callbacks = [];
90
91(function() {
92 class WindowNX {
93 constructor() {
94 yuzu_key_callbacks[1] = function() { window.history.back(); };
95 yuzu_key_callbacks[2] = function() { window.nx.endApplet(); };
96 }
97
98 addEventListener(type, listener, options) {
99 console.log("nx.addEventListener called, type=%s", type);
100
101 window.addEventListener(type, listener, options);
102 }
103
104 endApplet() {
105 console.log("nx.endApplet called");
106
107 end_applet = true;
108 }
109
110 playSystemSe(system_se) {
111 console.log("nx.playSystemSe is not implemented, system_se=%s", system_se);
112 }
113
114 sendMessage(message) {
115 console.log("nx.sendMessage is not implemented, message=%s", message);
116 }
117
118 setCursorScrollSpeed(scroll_speed) {
119 console.log("nx.setCursorScrollSpeed is not implemented, scroll_speed=%d", scroll_speed);
120 }
121 }
122
123 class WindowNXFooter {
124 setAssign(key, label, func, option) {
125 console.log("nx.footer.setAssign called, key=%s", key);
126
127 switch (key) {
128 case "A":
129 yuzu_key_callbacks[0] = func;
130 break;
131 case "B":
132 yuzu_key_callbacks[1] = func;
133 break;
134 case "X":
135 yuzu_key_callbacks[2] = func;
136 break;
137 case "Y":
138 yuzu_key_callbacks[3] = func;
139 break;
140 case "L":
141 yuzu_key_callbacks[6] = func;
142 break;
143 case "R":
144 yuzu_key_callbacks[7] = func;
145 break;
146 }
147 }
148
149 setFixed(kind) {
150 console.log("nx.footer.setFixed is not implemented, kind=%s", kind);
151 }
152
153 unsetAssign(key) {
154 console.log("nx.footer.unsetAssign called, key=%s", key);
155
156 switch (key) {
157 case "A":
158 yuzu_key_callbacks[0] = function() {};
159 break;
160 case "B":
161 yuzu_key_callbacks[1] = function() {};
162 break;
163 case "X":
164 yuzu_key_callbacks[2] = function() {};
165 break;
166 case "Y":
167 yuzu_key_callbacks[3] = function() {};
168 break;
169 case "L":
170 yuzu_key_callbacks[6] = function() {};
171 break;
172 case "R":
173 yuzu_key_callbacks[7] = function() {};
174 break;
175 }
176 }
177 }
178
179 class WindowNXPlayReport {
180 incrementCounter(counter_id) {
181 console.log("nx.playReport.incrementCounter is not implemented, counter_id=%d", counter_id);
182 }
183
184 setCounterSetIdentifier(counter_id) {
185 console.log("nx.playReport.setCounterSetIdentifier is not implemented, counter_id=%d", counter_id);
186 }
187 }
188
189 window.nx = new WindowNX();
190 window.nx.footer = new WindowNXFooter();
191 window.nx.playReport = new WindowNXPlayReport();
192})();
193)";
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 489104d5f..e124836b5 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -19,7 +19,7 @@
19#include <QOpenGLContext> 19#include <QOpenGLContext>
20#endif 20#endif
21 21
22#if !defined(WIN32) && HAS_VULKAN 22#if !defined(WIN32)
23#include <qpa/qplatformnativeinterface.h> 23#include <qpa/qplatformnativeinterface.h>
24#endif 24#endif
25 25
@@ -241,14 +241,12 @@ private:
241 std::unique_ptr<Core::Frontend::GraphicsContext> context; 241 std::unique_ptr<Core::Frontend::GraphicsContext> context;
242}; 242};
243 243
244#ifdef HAS_VULKAN
245class VulkanRenderWidget : public RenderWidget { 244class VulkanRenderWidget : public RenderWidget {
246public: 245public:
247 explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) { 246 explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {
248 windowHandle()->setSurfaceType(QWindow::VulkanSurface); 247 windowHandle()->setSurfaceType(QWindow::VulkanSurface);
249 } 248 }
250}; 249};
251#endif
252 250
253static Core::Frontend::WindowSystemType GetWindowSystemType() { 251static Core::Frontend::WindowSystemType GetWindowSystemType() {
254 // Determine WSI type based on Qt platform. 252 // Determine WSI type based on Qt platform.
@@ -268,7 +266,6 @@ static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow*
268 Core::Frontend::EmuWindow::WindowSystemInfo wsi; 266 Core::Frontend::EmuWindow::WindowSystemInfo wsi;
269 wsi.type = GetWindowSystemType(); 267 wsi.type = GetWindowSystemType();
270 268
271#ifdef HAS_VULKAN
272 // Our Win32 Qt external doesn't have the private API. 269 // Our Win32 Qt external doesn't have the private API.
273#if defined(WIN32) || defined(__APPLE__) 270#if defined(WIN32) || defined(__APPLE__)
274 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; 271 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
@@ -281,7 +278,6 @@ static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow*
281 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; 278 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
282#endif 279#endif
283 wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f; 280 wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
284#endif
285 281
286 return wsi; 282 return wsi;
287} 283}
@@ -569,6 +565,10 @@ void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_p
569 layout); 565 layout);
570} 566}
571 567
568bool GRenderWindow::IsLoadingComplete() const {
569 return first_frame;
570}
571
572void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal_size) { 572void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal_size) {
573 setMinimumSize(minimal_size.first, minimal_size.second); 573 setMinimumSize(minimal_size.first, minimal_size.second);
574} 574}
@@ -594,18 +594,12 @@ bool GRenderWindow::InitializeOpenGL() {
594} 594}
595 595
596bool GRenderWindow::InitializeVulkan() { 596bool GRenderWindow::InitializeVulkan() {
597#ifdef HAS_VULKAN
598 auto child = new VulkanRenderWidget(this); 597 auto child = new VulkanRenderWidget(this);
599 child_widget = child; 598 child_widget = child;
600 child_widget->windowHandle()->create(); 599 child_widget->windowHandle()->create();
601 main_context = std::make_unique<DummyContext>(); 600 main_context = std::make_unique<DummyContext>();
602 601
603 return true; 602 return true;
604#else
605 QMessageBox::critical(this, tr("Vulkan not available!"),
606 tr("yuzu has not been compiled with Vulkan support."));
607 return false;
608#endif
609} 603}
610 604
611bool GRenderWindow::LoadOpenGL() { 605bool GRenderWindow::LoadOpenGL() {
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index a6d788d40..ebe5cb965 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -162,6 +162,8 @@ public:
162 /// Destroy the previous run's child_widget which should also destroy the child_window 162 /// Destroy the previous run's child_widget which should also destroy the child_window
163 void ReleaseRenderTarget(); 163 void ReleaseRenderTarget();
164 164
165 bool IsLoadingComplete() const;
166
165 void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); 167 void CaptureScreenshot(u32 res_scale, const QString& screenshot_path);
166 168
167 std::pair<u32, u32> ScaleTouch(const QPointF& pos) const; 169 std::pair<u32, u32> ScaleTouch(const QPointF& pos) const;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index fcc38b3af..34c2a5f8b 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -511,6 +511,9 @@ void Config::ReadControlValues() {
511 ReadTouchscreenValues(); 511 ReadTouchscreenValues();
512 ReadMotionTouchValues(); 512 ReadMotionTouchValues();
513 513
514 Settings::values.emulate_analog_keyboard =
515 ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
516
514 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), false); 517 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), false);
515 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), 518 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
516 true); 519 true);
@@ -634,8 +637,6 @@ void Config::ReadDebuggingValues() {
634 // Intentionally not using the QT default setting as this is intended to be changed in the ini 637 // Intentionally not using the QT default setting as this is intended to be changed in the ini
635 Settings::values.record_frame_times = 638 Settings::values.record_frame_times =
636 qt_config->value(QStringLiteral("record_frame_times"), false).toBool(); 639 qt_config->value(QStringLiteral("record_frame_times"), false).toBool();
637 Settings::values.use_gdbstub = ReadSetting(QStringLiteral("use_gdbstub"), false).toBool();
638 Settings::values.gdbstub_port = ReadSetting(QStringLiteral("gdbstub_port"), 24689).toInt();
639 Settings::values.program_args = 640 Settings::values.program_args =
640 ReadSetting(QStringLiteral("program_args"), QString{}).toString().toStdString(); 641 ReadSetting(QStringLiteral("program_args"), QString{}).toString().toStdString();
641 Settings::values.dump_exefs = ReadSetting(QStringLiteral("dump_exefs"), false).toBool(); 642 Settings::values.dump_exefs = ReadSetting(QStringLiteral("dump_exefs"), false).toBool();
@@ -1186,6 +1187,8 @@ void Config::SaveControlValues() {
1186 QString::fromStdString(Settings::values.touch_device), 1187 QString::fromStdString(Settings::values.touch_device),
1187 QStringLiteral("engine:emu_window")); 1188 QStringLiteral("engine:emu_window"));
1188 WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false); 1189 WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false);
1190 WriteSetting(QStringLiteral("emulate_analog_keyboard"),
1191 Settings::values.emulate_analog_keyboard, false);
1189 1192
1190 qt_config->endGroup(); 1193 qt_config->endGroup();
1191} 1194}
@@ -1231,8 +1234,6 @@ void Config::SaveDebuggingValues() {
1231 1234
1232 // Intentionally not using the QT default setting as this is intended to be changed in the ini 1235 // Intentionally not using the QT default setting as this is intended to be changed in the ini
1233 qt_config->setValue(QStringLiteral("record_frame_times"), Settings::values.record_frame_times); 1236 qt_config->setValue(QStringLiteral("record_frame_times"), Settings::values.record_frame_times);
1234 WriteSetting(QStringLiteral("use_gdbstub"), Settings::values.use_gdbstub, false);
1235 WriteSetting(QStringLiteral("gdbstub_port"), Settings::values.gdbstub_port, 24689);
1236 WriteSetting(QStringLiteral("program_args"), 1237 WriteSetting(QStringLiteral("program_args"),
1237 QString::fromStdString(Settings::values.program_args), QString{}); 1238 QString::fromStdString(Settings::values.program_args), QString{});
1238 WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); 1239 WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false);
@@ -1588,14 +1589,12 @@ void Config::WriteSettingGlobal(const QString& name, const QVariant& value, bool
1588 1589
1589void Config::Reload() { 1590void Config::Reload() {
1590 ReadValues(); 1591 ReadValues();
1591 Settings::Sanitize();
1592 // To apply default value changes 1592 // To apply default value changes
1593 SaveValues(); 1593 SaveValues();
1594 Settings::Apply(Core::System::GetInstance()); 1594 Settings::Apply(Core::System::GetInstance());
1595} 1595}
1596 1596
1597void Config::Save() { 1597void Config::Save() {
1598 Settings::Sanitize();
1599 SaveValues(); 1598 SaveValues();
1600} 1599}
1601 1600
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 027099ab7..121873f95 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -28,9 +28,6 @@ ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::Co
28ConfigureDebug::~ConfigureDebug() = default; 28ConfigureDebug::~ConfigureDebug() = default;
29 29
30void ConfigureDebug::SetConfiguration() { 30void ConfigureDebug::SetConfiguration() {
31 ui->toggle_gdbstub->setChecked(Settings::values.use_gdbstub);
32 ui->gdbport_spinbox->setEnabled(Settings::values.use_gdbstub);
33 ui->gdbport_spinbox->setValue(Settings::values.gdbstub_port);
34 ui->toggle_console->setEnabled(!Core::System::GetInstance().IsPoweredOn()); 31 ui->toggle_console->setEnabled(!Core::System::GetInstance().IsPoweredOn());
35 ui->toggle_console->setChecked(UISettings::values.show_console); 32 ui->toggle_console->setChecked(UISettings::values.show_console);
36 ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter)); 33 ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter));
@@ -45,8 +42,6 @@ void ConfigureDebug::SetConfiguration() {
45} 42}
46 43
47void ConfigureDebug::ApplyConfiguration() { 44void ConfigureDebug::ApplyConfiguration() {
48 Settings::values.use_gdbstub = ui->toggle_gdbstub->isChecked();
49 Settings::values.gdbstub_port = ui->gdbport_spinbox->value();
50 UISettings::values.show_console = ui->toggle_console->isChecked(); 45 UISettings::values.show_console = ui->toggle_console->isChecked();
51 Settings::values.log_filter = ui->log_filter_edit->text().toStdString(); 46 Settings::values.log_filter = ui->log_filter_edit->text().toStdString();
52 Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); 47 Settings::values.program_args = ui->homebrew_args_edit->text().toStdString();
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index 6f94fe304..9186aa732 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -7,7 +7,7 @@
7 <x>0</x> 7 <x>0</x>
8 <y>0</y> 8 <y>0</y>
9 <width>400</width> 9 <width>400</width>
10 <height>467</height> 10 <height>486</height>
11 </rect> 11 </rect>
12 </property> 12 </property>
13 <property name="windowTitle"> 13 <property name="windowTitle">
@@ -15,57 +15,6 @@
15 </property> 15 </property>
16 <layout class="QVBoxLayout" name="verticalLayout_1"> 16 <layout class="QVBoxLayout" name="verticalLayout_1">
17 <item> 17 <item>
18 <layout class="QVBoxLayout" name="verticalLayout_2">
19 <item>
20 <widget class="QGroupBox" name="groupBox">
21 <property name="title">
22 <string>GDB</string>
23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_3">
25 <item>
26 <layout class="QHBoxLayout" name="horizontalLayout_1">
27 <item>
28 <widget class="QCheckBox" name="toggle_gdbstub">
29 <property name="text">
30 <string>Enable GDB Stub</string>
31 </property>
32 </widget>
33 </item>
34 <item>
35 <spacer name="horizontalSpacer">
36 <property name="orientation">
37 <enum>Qt::Horizontal</enum>
38 </property>
39 <property name="sizeHint" stdset="0">
40 <size>
41 <width>40</width>
42 <height>20</height>
43 </size>
44 </property>
45 </spacer>
46 </item>
47 <item>
48 <widget class="QLabel" name="label_1">
49 <property name="text">
50 <string>Port:</string>
51 </property>
52 </widget>
53 </item>
54 <item>
55 <widget class="QSpinBox" name="gdbport_spinbox">
56 <property name="maximum">
57 <number>65536</number>
58 </property>
59 </widget>
60 </item>
61 </layout>
62 </item>
63 </layout>
64 </widget>
65 </item>
66 </layout>
67 </item>
68 <item>
69 <widget class="QGroupBox" name="groupBox_2"> 18 <widget class="QGroupBox" name="groupBox_2">
70 <property name="title"> 19 <property name="title">
71 <string>Logging</string> 20 <string>Logging</string>
@@ -258,8 +207,6 @@
258 </layout> 207 </layout>
259 </widget> 208 </widget>
260 <tabstops> 209 <tabstops>
261 <tabstop>toggle_gdbstub</tabstop>
262 <tabstop>gdbport_spinbox</tabstop>
263 <tabstop>log_filter_edit</tabstop> 210 <tabstop>log_filter_edit</tabstop>
264 <tabstop>toggle_console</tabstop> 211 <tabstop>toggle_console</tabstop>
265 <tabstop>open_log_button</tabstop> 212 <tabstop>open_log_button</tabstop>
@@ -269,22 +216,5 @@
269 <tabstop>quest_flag</tabstop> 216 <tabstop>quest_flag</tabstop>
270 </tabstops> 217 </tabstops>
271 <resources/> 218 <resources/>
272 <connections> 219 <connections/>
273 <connection>
274 <sender>toggle_gdbstub</sender>
275 <signal>toggled(bool)</signal>
276 <receiver>gdbport_spinbox</receiver>
277 <slot>setEnabled(bool)</slot>
278 <hints>
279 <hint type="sourcelabel">
280 <x>84</x>
281 <y>157</y>
282 </hint>
283 <hint type="destinationlabel">
284 <x>342</x>
285 <y>158</y>
286 </hint>
287 </hints>
288 </connection>
289 </connections>
290</ui> 220</ui>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 6fda0ce35..b78a5dff0 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -4,22 +4,17 @@
4 4
5#include <QColorDialog> 5#include <QColorDialog>
6#include <QComboBox> 6#include <QComboBox>
7#ifdef HAS_VULKAN
8#include <QVulkanInstance> 7#include <QVulkanInstance>
9#endif
10 8
11#include "common/common_types.h" 9#include "common/common_types.h"
12#include "common/logging/log.h" 10#include "common/logging/log.h"
13#include "core/core.h" 11#include "core/core.h"
14#include "core/settings.h" 12#include "core/settings.h"
15#include "ui_configure_graphics.h" 13#include "ui_configure_graphics.h"
14#include "video_core/renderer_vulkan/renderer_vulkan.h"
16#include "yuzu/configuration/configuration_shared.h" 15#include "yuzu/configuration/configuration_shared.h"
17#include "yuzu/configuration/configure_graphics.h" 16#include "yuzu/configuration/configure_graphics.h"
18 17
19#ifdef HAS_VULKAN
20#include "video_core/renderer_vulkan/renderer_vulkan.h"
21#endif
22
23ConfigureGraphics::ConfigureGraphics(QWidget* parent) 18ConfigureGraphics::ConfigureGraphics(QWidget* parent)
24 : QWidget(parent), ui(new Ui::ConfigureGraphics) { 19 : QWidget(parent), ui(new Ui::ConfigureGraphics) {
25 vulkan_device = Settings::values.vulkan_device.GetValue(); 20 vulkan_device = Settings::values.vulkan_device.GetValue();
@@ -218,12 +213,10 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
218} 213}
219 214
220void ConfigureGraphics::RetrieveVulkanDevices() { 215void ConfigureGraphics::RetrieveVulkanDevices() {
221#ifdef HAS_VULKAN
222 vulkan_devices.clear(); 216 vulkan_devices.clear();
223 for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { 217 for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
224 vulkan_devices.push_back(QString::fromStdString(name)); 218 vulkan_devices.push_back(QString::fromStdString(name));
225 } 219 }
226#endif
227} 220}
228 221
229Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { 222Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp
index abaf03630..4e557bc6f 100644
--- a/src/yuzu/configuration/configure_input_advanced.cpp
+++ b/src/yuzu/configuration/configure_input_advanced.cpp
@@ -121,6 +121,7 @@ void ConfigureInputAdvanced::ApplyConfiguration() {
121 Settings::values.debug_pad_enabled = ui->debug_enabled->isChecked(); 121 Settings::values.debug_pad_enabled = ui->debug_enabled->isChecked();
122 Settings::values.mouse_enabled = ui->mouse_enabled->isChecked(); 122 Settings::values.mouse_enabled = ui->mouse_enabled->isChecked();
123 Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked(); 123 Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked();
124 Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked();
124 Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked(); 125 Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked();
125} 126}
126 127
@@ -147,6 +148,7 @@ void ConfigureInputAdvanced::LoadConfiguration() {
147 ui->debug_enabled->setChecked(Settings::values.debug_pad_enabled); 148 ui->debug_enabled->setChecked(Settings::values.debug_pad_enabled);
148 ui->mouse_enabled->setChecked(Settings::values.mouse_enabled); 149 ui->mouse_enabled->setChecked(Settings::values.mouse_enabled);
149 ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled); 150 ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled);
151 ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard);
150 ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled); 152 ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled);
151 153
152 UpdateUIEnabled(); 154 UpdateUIEnabled();
diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui
index a880a7c68..f207e5d3b 100644
--- a/src/yuzu/configuration/configure_input_advanced.ui
+++ b/src/yuzu/configuration/configure_input_advanced.ui
@@ -2546,14 +2546,27 @@
2546 </property> 2546 </property>
2547 </widget> 2547 </widget>
2548 </item> 2548 </item>
2549 <item row="4" column="2"> 2549 <item row="1" column="0">
2550 <widget class="QCheckBox" name="emulate_analog_keyboard">
2551 <property name="minimumSize">
2552 <size>
2553 <width>0</width>
2554 <height>23</height>
2555 </size>
2556 </property>
2557 <property name="text">
2558 <string>Emulate Analog with Keyboard Input</string>
2559 </property>
2560 </widget>
2561 </item>
2562 <item row="5" column="2">
2550 <widget class="QPushButton" name="touchscreen_advanced"> 2563 <widget class="QPushButton" name="touchscreen_advanced">
2551 <property name="text"> 2564 <property name="text">
2552 <string>Advanced</string> 2565 <string>Advanced</string>
2553 </property> 2566 </property>
2554 </widget> 2567 </widget>
2555 </item> 2568 </item>
2556 <item row="1" column="1"> 2569 <item row="2" column="1">
2557 <spacer name="horizontalSpacer_8"> 2570 <spacer name="horizontalSpacer_8">
2558 <property name="orientation"> 2571 <property name="orientation">
2559 <enum>Qt::Horizontal</enum> 2572 <enum>Qt::Horizontal</enum>
@@ -2569,21 +2582,21 @@
2569 </property> 2582 </property>
2570 </spacer> 2583 </spacer>
2571 </item> 2584 </item>
2572 <item row="1" column="2"> 2585 <item row="2" column="2">
2573 <widget class="QPushButton" name="mouse_advanced"> 2586 <widget class="QPushButton" name="mouse_advanced">
2574 <property name="text"> 2587 <property name="text">
2575 <string>Advanced</string> 2588 <string>Advanced</string>
2576 </property> 2589 </property>
2577 </widget> 2590 </widget>
2578 </item> 2591 </item>
2579 <item row="4" column="0"> 2592 <item row="5" column="0">
2580 <widget class="QCheckBox" name="touchscreen_enabled"> 2593 <widget class="QCheckBox" name="touchscreen_enabled">
2581 <property name="text"> 2594 <property name="text">
2582 <string>Touchscreen</string> 2595 <string>Touchscreen</string>
2583 </property> 2596 </property>
2584 </widget> 2597 </widget>
2585 </item> 2598 </item>
2586 <item row="1" column="0"> 2599 <item row="2" column="0">
2587 <widget class="QCheckBox" name="mouse_enabled"> 2600 <widget class="QCheckBox" name="mouse_enabled">
2588 <property name="minimumSize"> 2601 <property name="minimumSize">
2589 <size> 2602 <size>
@@ -2596,28 +2609,28 @@
2596 </property> 2609 </property>
2597 </widget> 2610 </widget>
2598 </item> 2611 </item>
2599 <item row="6" column="0"> 2612 <item row="7" column="0">
2600 <widget class="QLabel" name="motion_touch"> 2613 <widget class="QLabel" name="motion_touch">
2601 <property name="text"> 2614 <property name="text">
2602 <string>Motion / Touch</string> 2615 <string>Motion / Touch</string>
2603 </property> 2616 </property>
2604 </widget> 2617 </widget>
2605 </item> 2618 </item>
2606 <item row="6" column="2"> 2619 <item row="7" column="2">
2607 <widget class="QPushButton" name="buttonMotionTouch"> 2620 <widget class="QPushButton" name="buttonMotionTouch">
2608 <property name="text"> 2621 <property name="text">
2609 <string>Configure</string> 2622 <string>Configure</string>
2610 </property> 2623 </property>
2611 </widget> 2624 </widget>
2612 </item> 2625 </item>
2613 <item row="5" column="0"> 2626 <item row="6" column="0">
2614 <widget class="QCheckBox" name="debug_enabled"> 2627 <widget class="QCheckBox" name="debug_enabled">
2615 <property name="text"> 2628 <property name="text">
2616 <string>Debug Controller</string> 2629 <string>Debug Controller</string>
2617 </property> 2630 </property>
2618 </widget> 2631 </widget>
2619 </item> 2632 </item>
2620 <item row="5" column="2"> 2633 <item row="6" column="2">
2621 <widget class="QPushButton" name="debug_configure"> 2634 <widget class="QPushButton" name="debug_configure">
2622 <property name="text"> 2635 <property name="text">
2623 <string>Configure</string> 2636 <string>Configure</string>
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index f9915fb7a..3c7500ee3 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -173,61 +173,31 @@ QString AnalogToText(const Common::ParamPackage& param, const std::string& dir)
173 return ButtonToText(Common::ParamPackage{param.Get(dir, "")}); 173 return ButtonToText(Common::ParamPackage{param.Get(dir, "")});
174 } 174 }
175 175
176 if (param.Get("engine", "") == "sdl") { 176 const auto engine_str = param.Get("engine", "");
177 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", ""));
178 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", ""));
179 const bool invert_x = param.Get("invert_x", "+") == "-";
180 const bool invert_y = param.Get("invert_y", "+") == "-";
181 if (engine_str == "sdl" || engine_str == "gcpad" || engine_str == "mouse") {
177 if (dir == "modifier") { 182 if (dir == "modifier") {
178 return QObject::tr("[unused]"); 183 return QObject::tr("[unused]");
179 } 184 }
180 185
181 if (dir == "left" || dir == "right") { 186 if (dir == "left") {
182 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); 187 const QString invert_x_str = QString::fromStdString(invert_x ? "+" : "-");
183 188 return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str);
184 return QObject::tr("Axis %1").arg(axis_x_str);
185 }
186
187 if (dir == "up" || dir == "down") {
188 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", ""));
189
190 return QObject::tr("Axis %1").arg(axis_y_str);
191 } 189 }
192 190 if (dir == "right") {
193 return {}; 191 const QString invert_x_str = QString::fromStdString(invert_x ? "-" : "+");
194 } 192 return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str);
195
196 if (param.Get("engine", "") == "gcpad") {
197 if (dir == "modifier") {
198 return QObject::tr("[unused]");
199 } 193 }
200 194 if (dir == "up") {
201 if (dir == "left" || dir == "right") { 195 const QString invert_y_str = QString::fromStdString(invert_y ? "-" : "+");
202 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); 196 return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str);
203
204 return QObject::tr("GC Axis %1").arg(axis_x_str);
205 } 197 }
206 198 if (dir == "down") {
207 if (dir == "up" || dir == "down") { 199 const QString invert_y_str = QString::fromStdString(invert_y ? "+" : "-");
208 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); 200 return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str);
209
210 return QObject::tr("GC Axis %1").arg(axis_y_str);
211 }
212
213 return {};
214 }
215
216 if (param.Get("engine", "") == "mouse") {
217 if (dir == "modifier") {
218 return QObject::tr("[unused]");
219 }
220
221 if (dir == "left" || dir == "right") {
222 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", ""));
223
224 return QObject::tr("Mouse %1").arg(axis_x_str);
225 }
226
227 if (dir == "up" || dir == "down") {
228 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", ""));
229
230 return QObject::tr("Mouse %1").arg(axis_y_str);
231 } 201 }
232 202
233 return {}; 203 return {};
@@ -396,6 +366,25 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
396 analogs_param[analog_id].Clear(); 366 analogs_param[analog_id].Clear();
397 analog_map_buttons[analog_id][sub_button_id]->setText(tr("[not set]")); 367 analog_map_buttons[analog_id][sub_button_id]->setText(tr("[not set]"));
398 }); 368 });
369 context_menu.addAction(tr("Invert axis"), [&] {
370 if (sub_button_id == 2 || sub_button_id == 3) {
371 const bool invert_value =
372 analogs_param[analog_id].Get("invert_x", "+") == "-";
373 const std::string invert_str = invert_value ? "+" : "-";
374 analogs_param[analog_id].Set("invert_x", invert_str);
375 }
376 if (sub_button_id == 0 || sub_button_id == 1) {
377 const bool invert_value =
378 analogs_param[analog_id].Get("invert_y", "+") == "-";
379 const std::string invert_str = invert_value ? "+" : "-";
380 analogs_param[analog_id].Set("invert_y", invert_str);
381 }
382 for (int sub_button_id = 0; sub_button_id < ANALOG_SUB_BUTTONS_NUM;
383 ++sub_button_id) {
384 analog_map_buttons[analog_id][sub_button_id]->setText(AnalogToText(
385 analogs_param[analog_id], analog_sub_buttons[sub_button_id]));
386 }
387 });
399 context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal( 388 context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal(
400 menu_location)); 389 menu_location));
401 }); 390 });
diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp
index 2afac591a..c2a7113da 100644
--- a/src/yuzu/configuration/configure_motion_touch.cpp
+++ b/src/yuzu/configuration/configure_motion_touch.cpp
@@ -183,8 +183,8 @@ void ConfigureMotionTouch::ConnectEvents() {
183} 183}
184 184
185void ConfigureMotionTouch::OnUDPAddServer() { 185void ConfigureMotionTouch::OnUDPAddServer() {
186 QRegExp re(tr("^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[" 186 QRegExp re(tr(R"re(^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4]"
187 "0-9][0-9]?)$")); // a valid ip address 187 "[0-9]|[01]?[0-9][0-9]?)$)re")); // a valid ip address
188 bool ok; 188 bool ok;
189 QString port_text = ui->udp_port->text(); 189 QString port_text = ui->udp_port->text();
190 QString server_text = ui->udp_server->text(); 190 QString server_text = ui->udp_server->text();
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index 0e26f765b..efdc6aa50 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -48,7 +48,7 @@ private:
48 48
49MicroProfileDialog::MicroProfileDialog(QWidget* parent) : QWidget(parent, Qt::Dialog) { 49MicroProfileDialog::MicroProfileDialog(QWidget* parent) : QWidget(parent, Qt::Dialog) {
50 setObjectName(QStringLiteral("MicroProfile")); 50 setObjectName(QStringLiteral("MicroProfile"));
51 setWindowTitle(tr("MicroProfile")); 51 setWindowTitle(tr("&MicroProfile"));
52 resize(1000, 600); 52 resize(1000, 600);
53 // Remove the "?" button from the titlebar and enable the maximize button 53 // Remove the "?" button from the titlebar and enable the maximize button
54 setWindowFlags((windowFlags() & ~Qt::WindowContextHelpButtonHint) | 54 setWindowFlags((windowFlags() & ~Qt::WindowContextHelpButtonHint) |
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index a20824719..0925c10b4 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -13,10 +13,10 @@
13#include "core/arm/arm_interface.h" 13#include "core/arm/arm_interface.h"
14#include "core/core.h" 14#include "core/core.h"
15#include "core/hle/kernel/handle_table.h" 15#include "core/hle/kernel/handle_table.h"
16#include "core/hle/kernel/k_scheduler.h"
16#include "core/hle/kernel/mutex.h" 17#include "core/hle/kernel/mutex.h"
17#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/readable_event.h" 19#include "core/hle/kernel/readable_event.h"
19#include "core/hle/kernel/scheduler.h"
20#include "core/hle/kernel/synchronization_object.h" 20#include "core/hle/kernel/synchronization_object.h"
21#include "core/hle/kernel/thread.h" 21#include "core/hle/kernel/thread.h"
22#include "core/memory.h" 22#include "core/memory.h"
@@ -101,7 +101,7 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()
101 }; 101 };
102 102
103 const auto& system = Core::System::GetInstance(); 103 const auto& system = Core::System::GetInstance();
104 add_threads(system.GlobalScheduler().GetThreadList()); 104 add_threads(system.GlobalSchedulerContext().GetThreadList());
105 105
106 return item_list; 106 return item_list;
107} 107}
@@ -349,14 +349,14 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
349 list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor))); 349 list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor)));
350 list.push_back( 350 list.push_back(
351 std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.GetIdealCore()))); 351 std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.GetIdealCore())));
352 list.push_back( 352 list.push_back(std::make_unique<WaitTreeText>(
353 std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.GetAffinityMask()))); 353 tr("affinity mask = %1").arg(thread.GetAffinityMask().GetAffinityMask())));
354 list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadID()))); 354 list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadID())));
355 list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)") 355 list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
356 .arg(thread.GetPriority()) 356 .arg(thread.GetPriority())
357 .arg(thread.GetNominalPriority()))); 357 .arg(thread.GetNominalPriority())));
358 list.push_back(std::make_unique<WaitTreeText>( 358 list.push_back(std::make_unique<WaitTreeText>(
359 tr("last running ticks = %1").arg(thread.GetLastRunningTicks()))); 359 tr("last running ticks = %1").arg(thread.GetLastScheduledTick())));
360 360
361 const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); 361 const VAddr mutex_wait_address = thread.GetMutexWaitAddress();
362 if (mutex_wait_address != 0) { 362 if (mutex_wait_address != 0) {
@@ -457,7 +457,7 @@ void WaitTreeModel::InitItems() {
457 thread_items = WaitTreeItem::MakeThreadItemList(); 457 thread_items = WaitTreeItem::MakeThreadItemList();
458} 458}
459 459
460WaitTreeWidget::WaitTreeWidget(QWidget* parent) : QDockWidget(tr("Wait Tree"), parent) { 460WaitTreeWidget::WaitTreeWidget(QWidget* parent) : QDockWidget(tr("&Wait Tree"), parent) {
461 setObjectName(QStringLiteral("WaitTreeWidget")); 461 setObjectName(QStringLiteral("WaitTreeWidget"));
462 view = new QTreeView(this); 462 view = new QTreeView(this);
463 view->setHeaderHidden(true); 463 view->setHeaderHidden(true);
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 248855aff..df935022d 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -174,7 +174,8 @@ public:
174 } 174 }
175 175
176 bool operator<(const QStandardItem& other) const override { 176 bool operator<(const QStandardItem& other) const override {
177 return data(CompatNumberRole) < other.data(CompatNumberRole); 177 return data(CompatNumberRole).value<QString>() <
178 other.data(CompatNumberRole).value<QString>();
178 } 179 }
179}; 180};
180 181
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 26f5e42ed..ab66d7f93 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -28,8 +28,6 @@
28#include "core/hle/service/am/applet_ae.h" 28#include "core/hle/service/am/applet_ae.h"
29#include "core/hle/service/am/applet_oe.h" 29#include "core/hle/service/am/applet_oe.h"
30#include "core/hle/service/am/applets/applets.h" 30#include "core/hle/service/am/applets/applets.h"
31#include "core/hle/service/hid/controllers/npad.h"
32#include "core/hle/service/hid/hid.h"
33 31
34// These are wrappers to avoid the calls to CreateDirectory and CreateFile because of the Windows 32// These are wrappers to avoid the calls to CreateDirectory and CreateFile because of the Windows
35// defines. 33// defines.
@@ -83,6 +81,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
83#include "core/core.h" 81#include "core/core.h"
84#include "core/crypto/key_manager.h" 82#include "core/crypto/key_manager.h"
85#include "core/file_sys/card_image.h" 83#include "core/file_sys/card_image.h"
84#include "core/file_sys/common_funcs.h"
86#include "core/file_sys/content_archive.h" 85#include "core/file_sys/content_archive.h"
87#include "core/file_sys/control_metadata.h" 86#include "core/file_sys/control_metadata.h"
88#include "core/file_sys/patch_manager.h" 87#include "core/file_sys/patch_manager.h"
@@ -124,14 +123,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
124#include "yuzu/discord_impl.h" 123#include "yuzu/discord_impl.h"
125#endif 124#endif
126 125
127#ifdef YUZU_USE_QT_WEB_ENGINE
128#include <QWebEngineProfile>
129#include <QWebEngineScript>
130#include <QWebEngineScriptCollection>
131#include <QWebEngineSettings>
132#include <QWebEngineView>
133#endif
134
135#ifdef QT_STATICPLUGIN 126#ifdef QT_STATICPLUGIN
136Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin); 127Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin);
137#endif 128#endif
@@ -148,8 +139,6 @@ __declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
148 139
149constexpr int default_mouse_timeout = 2500; 140constexpr int default_mouse_timeout = 2500;
150 141
151constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000;
152
153/** 142/**
154 * "Callouts" are one-time instructional messages shown to the user. In the config settings, there 143 * "Callouts" are one-time instructional messages shown to the user. In the config settings, there
155 * is a bitfield "callout_flags" options, used to track if a message has already been shown to the 144 * is a bitfield "callout_flags" options, used to track if a message has already been shown to the
@@ -191,6 +180,30 @@ static void InitializeLogging() {
191#endif 180#endif
192} 181}
193 182
183static void RemoveCachedContents() {
184 const auto offline_fonts = Common::FS::SanitizePath(
185 fmt::format("{}/fonts", Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)),
186 Common::FS::DirectorySeparator::PlatformDefault);
187
188 const auto offline_manual = Common::FS::SanitizePath(
189 fmt::format("{}/offline_web_applet_manual",
190 Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)),
191 Common::FS::DirectorySeparator::PlatformDefault);
192 const auto offline_legal_information = Common::FS::SanitizePath(
193 fmt::format("{}/offline_web_applet_legal_information",
194 Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)),
195 Common::FS::DirectorySeparator::PlatformDefault);
196 const auto offline_system_data = Common::FS::SanitizePath(
197 fmt::format("{}/offline_web_applet_system_data",
198 Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)),
199 Common::FS::DirectorySeparator::PlatformDefault);
200
201 Common::FS::DeleteDirRecursively(offline_fonts);
202 Common::FS::DeleteDirRecursively(offline_manual);
203 Common::FS::DeleteDirRecursively(offline_legal_information);
204 Common::FS::DeleteDirRecursively(offline_system_data);
205}
206
194GMainWindow::GMainWindow() 207GMainWindow::GMainWindow()
195 : input_subsystem{std::make_shared<InputCommon::InputSubsystem>()}, 208 : input_subsystem{std::make_shared<InputCommon::InputSubsystem>()},
196 config{std::make_unique<Config>()}, vfs{std::make_shared<FileSys::RealVfsFilesystem>()}, 209 config{std::make_unique<Config>()}, vfs{std::make_shared<FileSys::RealVfsFilesystem>()},
@@ -259,6 +272,9 @@ GMainWindow::GMainWindow()
259 FileSys::ContentProviderUnionSlot::FrontendManual, provider.get()); 272 FileSys::ContentProviderUnionSlot::FrontendManual, provider.get());
260 Core::System::GetInstance().GetFileSystemController().CreateFactories(*vfs); 273 Core::System::GetInstance().GetFileSystemController().CreateFactories(*vfs);
261 274
275 // Remove cached contents generated during the previous session
276 RemoveCachedContents();
277
262 // Gen keys if necessary 278 // Gen keys if necessary
263 OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning); 279 OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning);
264 280
@@ -350,150 +366,141 @@ void GMainWindow::SoftwareKeyboardInvokeCheckDialog(std::u16string error_message
350 emit SoftwareKeyboardFinishedCheckDialog(); 366 emit SoftwareKeyboardFinishedCheckDialog();
351} 367}
352 368
369void GMainWindow::WebBrowserOpenWebPage(std::string_view main_url, std::string_view additional_args,
370 bool is_local) {
353#ifdef YUZU_USE_QT_WEB_ENGINE 371#ifdef YUZU_USE_QT_WEB_ENGINE
354 372
355void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view additional_args) { 373 if (disable_web_applet) {
356 NXInputWebEngineView web_browser_view(this); 374 emit WebBrowserClosed(Service::AM::Applets::WebExitReason::WindowClosed,
375 "http://localhost/");
376 return;
377 }
357 378
358 // Scope to contain the QProgressDialog for initialization 379 QtNXWebEngineView web_browser_view(this, Core::System::GetInstance(), input_subsystem.get());
359 {
360 QProgressDialog progress(this);
361 progress.setMinimumDuration(200);
362 progress.setLabelText(tr("Loading Web Applet..."));
363 progress.setRange(0, 4);
364 progress.setValue(0);
365 progress.show();
366 380
367 auto future = QtConcurrent::run([this] { emit WebBrowserUnpackRomFS(); }); 381 ui.action_Pause->setEnabled(false);
382 ui.action_Restart->setEnabled(false);
383 ui.action_Stop->setEnabled(false);
368 384
369 while (!future.isFinished()) 385 {
370 QApplication::processEvents(); 386 QProgressDialog loading_progress(this);
387 loading_progress.setLabelText(tr("Loading Web Applet..."));
388 loading_progress.setRange(0, 3);
389 loading_progress.setValue(0);
390
391 if (is_local && !Common::FS::Exists(std::string(main_url))) {
392 loading_progress.show();
371 393
372 progress.setValue(1); 394 auto future = QtConcurrent::run([this] { emit WebBrowserExtractOfflineRomFS(); });
373 395
374 // Load the special shim script to handle input and exit. 396 while (!future.isFinished()) {
375 QWebEngineScript nx_shim; 397 QCoreApplication::processEvents();
376 nx_shim.setSourceCode(GetNXShimInjectionScript()); 398 }
377 nx_shim.setWorldId(QWebEngineScript::MainWorld); 399 }
378 nx_shim.setName(QStringLiteral("nx_inject.js"));
379 nx_shim.setInjectionPoint(QWebEngineScript::DocumentCreation);
380 nx_shim.setRunsOnSubFrames(true);
381 web_browser_view.page()->profile()->scripts()->insert(nx_shim);
382 400
383 web_browser_view.load( 401 loading_progress.setValue(1);
384 QUrl(QUrl::fromLocalFile(QString::fromStdString(std::string(filename))).toString() +
385 QString::fromStdString(std::string(additional_args))));
386 402
387 progress.setValue(2); 403 if (is_local) {
404 web_browser_view.LoadLocalWebPage(main_url, additional_args);
405 } else {
406 web_browser_view.LoadExternalWebPage(main_url, additional_args);
407 }
388 408
389 render_window->hide(); 409 if (render_window->IsLoadingComplete()) {
390 web_browser_view.setFocus(); 410 render_window->hide();
411 }
391 412
392 const auto& layout = render_window->GetFramebufferLayout(); 413 const auto& layout = render_window->GetFramebufferLayout();
393 web_browser_view.resize(layout.screen.GetWidth(), layout.screen.GetHeight()); 414 web_browser_view.resize(layout.screen.GetWidth(), layout.screen.GetHeight());
394 web_browser_view.move(layout.screen.left, layout.screen.top + menuBar()->height()); 415 web_browser_view.move(layout.screen.left, layout.screen.top + menuBar()->height());
395 web_browser_view.setZoomFactor(static_cast<qreal>(layout.screen.GetWidth()) / 416 web_browser_view.setZoomFactor(static_cast<qreal>(layout.screen.GetWidth()) /
396 Layout::ScreenUndocked::Width); 417 static_cast<qreal>(Layout::ScreenUndocked::Width));
397 web_browser_view.settings()->setAttribute(
398 QWebEngineSettings::LocalContentCanAccessRemoteUrls, true);
399 418
419 web_browser_view.setFocus();
400 web_browser_view.show(); 420 web_browser_view.show();
401 421
402 progress.setValue(3); 422 loading_progress.setValue(2);
403 423
404 QApplication::processEvents(); 424 QCoreApplication::processEvents();
405 425
406 progress.setValue(4); 426 loading_progress.setValue(3);
407 } 427 }
408 428
409 bool finished = false; 429 bool exit_check = false;
410 QAction* exit_action = new QAction(tr("Exit Web Applet"), this);
411 connect(exit_action, &QAction::triggered, this, [&finished] { finished = true; });
412 ui.menubar->addAction(exit_action);
413 430
414 auto& npad = 431 // TODO (Morph): Remove this
415 Core::System::GetInstance() 432 QAction* exit_action = new QAction(tr("Disable Web Applet"), this);
416 .ServiceManager() 433 connect(exit_action, &QAction::triggered, this, [this, &web_browser_view] {
417 .GetService<Service::HID::Hid>("hid") 434 const auto result = QMessageBox::warning(
418 ->GetAppletResource() 435 this, tr("Disable Web Applet"),
419 ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad); 436 tr("Disabling the web applet will cause it to not be shown again for the rest of the "
420 437 "emulated session. This can lead to undefined behavior and should only be used with "
421 const auto fire_js_keypress = [&web_browser_view](u32 key_code) { 438 "Super Mario 3D All-Stars. Are you sure you want to disable the web applet?"),
422 web_browser_view.page()->runJavaScript( 439 QMessageBox::Yes | QMessageBox::No);
423 QStringLiteral("document.dispatchEvent(new KeyboardEvent('keydown', {'key': %1}));") 440 if (result == QMessageBox::Yes) {
424 .arg(key_code)); 441 disable_web_applet = true;
425 }; 442 web_browser_view.SetFinished(true);
443 }
444 });
445 ui.menubar->addAction(exit_action);
426 446
427 QMessageBox::information( 447 while (!web_browser_view.IsFinished()) {
428 this, tr("Exit"), 448 QCoreApplication::processEvents();
429 tr("To exit the web application, use the game provided controls to select exit, select the " 449
430 "'Exit Web Applet' option in the menu bar, or press the 'Enter' key.")); 450 if (!exit_check) {
431 451 web_browser_view.page()->runJavaScript(
432 bool running_exit_check = false; 452 QStringLiteral("end_applet;"), [&](const QVariant& variant) {
433 while (!finished) { 453 exit_check = false;
434 QApplication::processEvents(); 454 if (variant.toBool()) {
435 455 web_browser_view.SetFinished(true);
436 if (!running_exit_check) { 456 web_browser_view.SetExitReason(
437 web_browser_view.page()->runJavaScript(QStringLiteral("applet_done;"), 457 Service::AM::Applets::WebExitReason::EndButtonPressed);
438 [&](const QVariant& res) { 458 }
439 running_exit_check = false; 459 });
440 if (res.toBool()) 460
441 finished = true; 461 exit_check = true;
442 });
443 running_exit_check = true;
444 } 462 }
445 463
446 const auto input = npad.GetAndResetPressState(); 464 if (web_browser_view.GetCurrentURL().contains(QStringLiteral("localhost"))) {
447 for (std::size_t i = 0; i < Settings::NativeButton::NumButtons; ++i) { 465 if (!web_browser_view.IsFinished()) {
448 if ((input & (1 << i)) != 0) { 466 web_browser_view.SetFinished(true);
449 LOG_DEBUG(Frontend, "firing input for button id={:02X}", i); 467 web_browser_view.SetExitReason(Service::AM::Applets::WebExitReason::CallbackURL);
450 web_browser_view.page()->runJavaScript(
451 QStringLiteral("yuzu_key_callbacks[%1]();").arg(i));
452 } 468 }
469
470 web_browser_view.SetLastURL(web_browser_view.GetCurrentURL().toStdString());
453 } 471 }
454 472
455 if (input & 0x00888000) // RStick Down | LStick Down | DPad Down 473 std::this_thread::sleep_for(std::chrono::milliseconds(1));
456 fire_js_keypress(40); // Down Arrow Key
457 else if (input & 0x00444000) // RStick Right | LStick Right | DPad Right
458 fire_js_keypress(39); // Right Arrow Key
459 else if (input & 0x00222000) // RStick Up | LStick Up | DPad Up
460 fire_js_keypress(38); // Up Arrow Key
461 else if (input & 0x00111000) // RStick Left | LStick Left | DPad Left
462 fire_js_keypress(37); // Left Arrow Key
463 else if (input & 0x00000001) // A Button
464 fire_js_keypress(13); // Enter Key
465 } 474 }
466 475
476 const auto exit_reason = web_browser_view.GetExitReason();
477 const auto last_url = web_browser_view.GetLastURL();
478
467 web_browser_view.hide(); 479 web_browser_view.hide();
468 render_window->show(); 480
469 render_window->setFocus(); 481 render_window->setFocus();
470 ui.menubar->removeAction(exit_action);
471 482
472 // Needed to update render window focus/show and remove menubar action 483 if (render_window->IsLoadingComplete()) {
473 QApplication::processEvents(); 484 render_window->show();
474 emit WebBrowserFinishedBrowsing(); 485 }
475}
476 486
477#else 487 ui.action_Pause->setEnabled(true);
488 ui.action_Restart->setEnabled(true);
489 ui.action_Stop->setEnabled(true);
478 490
479void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view additional_args) { 491 ui.menubar->removeAction(exit_action);
480#ifndef __linux__
481 QMessageBox::warning(
482 this, tr("Web Applet"),
483 tr("This version of yuzu was built without QtWebEngine support, meaning that yuzu cannot "
484 "properly display the game manual or web page requested."),
485 QMessageBox::Ok, QMessageBox::Ok);
486#endif
487 492
488 LOG_INFO(Frontend, 493 QCoreApplication::processEvents();
489 "(STUBBED) called - Missing QtWebEngine dependency needed to open website page at "
490 "'{}' with arguments '{}'!",
491 filename, additional_args);
492 494
493 emit WebBrowserFinishedBrowsing(); 495 emit WebBrowserClosed(exit_reason, last_url);
494} 496
497#else
498
499 // Utilize the same fallback as the default web browser applet.
500 emit WebBrowserClosed(Service::AM::Applets::WebExitReason::WindowClosed, "http://localhost/");
495 501
496#endif 502#endif
503}
497 504
498void GMainWindow::InitializeWidgets() { 505void GMainWindow::InitializeWidgets() {
499#ifdef YUZU_ENABLE_COMPATIBILITY_REPORTING 506#ifdef YUZU_ENABLE_COMPATIBILITY_REPORTING
@@ -573,9 +580,8 @@ void GMainWindow::InitializeWidgets() {
573 if (emulation_running) { 580 if (emulation_running) {
574 return; 581 return;
575 } 582 }
576 const bool is_async = !Settings::values.use_asynchronous_gpu_emulation.GetValue() || 583 Settings::values.use_asynchronous_gpu_emulation.SetValue(
577 Settings::values.use_multi_core.GetValue(); 584 !Settings::values.use_asynchronous_gpu_emulation.GetValue());
578 Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async);
579 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); 585 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
580 Settings::Apply(Core::System::GetInstance()); 586 Settings::Apply(Core::System::GetInstance());
581 }); 587 });
@@ -592,16 +598,13 @@ void GMainWindow::InitializeWidgets() {
592 return; 598 return;
593 } 599 }
594 Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); 600 Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue());
595 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue() ||
596 Settings::values.use_multi_core.GetValue();
597 Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async);
598 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
599 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); 601 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
600 Settings::Apply(Core::System::GetInstance()); 602 Settings::Apply(Core::System::GetInstance());
601 }); 603 });
602 multicore_status_button->setText(tr("MULTICORE")); 604 multicore_status_button->setText(tr("MULTICORE"));
603 multicore_status_button->setCheckable(true); 605 multicore_status_button->setCheckable(true);
604 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); 606 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
607
605 statusBar()->insertPermanentWidget(0, multicore_status_button); 608 statusBar()->insertPermanentWidget(0, multicore_status_button);
606 statusBar()->insertPermanentWidget(0, async_status_button); 609 statusBar()->insertPermanentWidget(0, async_status_button);
607 610
@@ -615,11 +618,6 @@ void GMainWindow::InitializeWidgets() {
615 }); 618 });
616 renderer_status_button->toggle(); 619 renderer_status_button->toggle();
617 620
618#ifndef HAS_VULKAN
619 renderer_status_button->setChecked(false);
620 renderer_status_button->setCheckable(false);
621 renderer_status_button->setDisabled(true);
622#else
623 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == 621 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() ==
624 Settings::RendererBackend::Vulkan); 622 Settings::RendererBackend::Vulkan);
625 connect(renderer_status_button, &QPushButton::clicked, [this] { 623 connect(renderer_status_button, &QPushButton::clicked, [this] {
@@ -634,7 +632,6 @@ void GMainWindow::InitializeWidgets() {
634 632
635 Settings::Apply(Core::System::GetInstance()); 633 Settings::Apply(Core::System::GetInstance());
636 }); 634 });
637#endif // HAS_VULKAN
638 statusBar()->insertPermanentWidget(0, renderer_status_button); 635 statusBar()->insertPermanentWidget(0, renderer_status_button);
639 636
640 statusBar()->setVisible(true); 637 statusBar()->setVisible(true);
@@ -670,7 +667,7 @@ void GMainWindow::InitializeRecentFileMenuActions() {
670 } 667 }
671 ui.menu_recent_files->addSeparator(); 668 ui.menu_recent_files->addSeparator();
672 QAction* action_clear_recent_files = new QAction(this); 669 QAction* action_clear_recent_files = new QAction(this);
673 action_clear_recent_files->setText(tr("Clear Recent Files")); 670 action_clear_recent_files->setText(tr("&Clear Recent Files"));
674 connect(action_clear_recent_files, &QAction::triggered, this, [this] { 671 connect(action_clear_recent_files, &QAction::triggered, this, [this] {
675 UISettings::values.recent_files.clear(); 672 UISettings::values.recent_files.clear();
676 UpdateRecentFiles(); 673 UpdateRecentFiles();
@@ -932,7 +929,10 @@ void GMainWindow::ConnectMenuEvents() {
932 &GMainWindow::OnDisplayTitleBars); 929 &GMainWindow::OnDisplayTitleBars);
933 connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar); 930 connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar);
934 connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible); 931 connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible);
935 connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize); 932 connect(ui.action_Reset_Window_Size_720, &QAction::triggered, this,
933 &GMainWindow::ResetWindowSize720);
934 connect(ui.action_Reset_Window_Size_1080, &QAction::triggered, this,
935 &GMainWindow::ResetWindowSize1080);
936 936
937 // Fullscreen 937 // Fullscreen
938 connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen); 938 connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen);
@@ -994,7 +994,6 @@ bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) {
994 994
995 system.SetAppletFrontendSet({ 995 system.SetAppletFrontendSet({
996 std::make_unique<QtControllerSelector>(*this), // Controller Selector 996 std::make_unique<QtControllerSelector>(*this), // Controller Selector
997 nullptr, // E-Commerce
998 std::make_unique<QtErrorDisplay>(*this), // Error Display 997 std::make_unique<QtErrorDisplay>(*this), // Error Display
999 nullptr, // Parental Controls 998 nullptr, // Parental Controls
1000 nullptr, // Photo Viewer 999 nullptr, // Photo Viewer
@@ -1107,6 +1106,11 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
1107 1106
1108 ConfigureVibration::SetAllVibrationDevices(); 1107 ConfigureVibration::SetAllVibrationDevices();
1109 1108
1109 // Save configurations
1110 UpdateUISettings();
1111 game_list->SaveInterfaceLayout();
1112 config->Save();
1113
1110 Settings::LogSettings(); 1114 Settings::LogSettings();
1111 1115
1112 if (UISettings::values.select_user_on_boot) { 1116 if (UISettings::values.select_user_on_boot) {
@@ -1240,9 +1244,7 @@ void GMainWindow::ShutdownGame() {
1240 emu_frametime_label->setVisible(false); 1244 emu_frametime_label->setVisible(false);
1241 async_status_button->setEnabled(true); 1245 async_status_button->setEnabled(true);
1242 multicore_status_button->setEnabled(true); 1246 multicore_status_button->setEnabled(true);
1243#ifdef HAS_VULKAN
1244 renderer_status_button->setEnabled(true); 1247 renderer_status_button->setEnabled(true);
1245#endif
1246 1248
1247 emulation_running = false; 1249 emulation_running = false;
1248 1250
@@ -1529,7 +1531,7 @@ void GMainWindow::RemoveAddOnContent(u64 program_id, const QString& entry_type)
1529 FileSys::TitleType::AOC, FileSys::ContentRecordType::Data); 1531 FileSys::TitleType::AOC, FileSys::ContentRecordType::Data);
1530 1532
1531 for (const auto& entry : dlc_entries) { 1533 for (const auto& entry : dlc_entries) {
1532 if ((entry.title_id & DLC_BASE_TITLE_ID_MASK) == program_id) { 1534 if (FileSys::GetBaseTitleID(entry.title_id) == program_id) {
1533 const auto res = 1535 const auto res =
1534 fs_controller.GetUserNANDContents()->RemoveExistingEntry(entry.title_id) || 1536 fs_controller.GetUserNANDContents()->RemoveExistingEntry(entry.title_id) ||
1535 fs_controller.GetSDMCContents()->RemoveExistingEntry(entry.title_id); 1537 fs_controller.GetSDMCContents()->RemoveExistingEntry(entry.title_id);
@@ -2103,11 +2105,12 @@ void GMainWindow::OnStartGame() {
2103 qRegisterMetaType<std::string>("std::string"); 2105 qRegisterMetaType<std::string>("std::string");
2104 qRegisterMetaType<std::optional<std::u16string>>("std::optional<std::u16string>"); 2106 qRegisterMetaType<std::optional<std::u16string>>("std::optional<std::u16string>");
2105 qRegisterMetaType<std::string_view>("std::string_view"); 2107 qRegisterMetaType<std::string_view>("std::string_view");
2108 qRegisterMetaType<Service::AM::Applets::WebExitReason>("Service::AM::Applets::WebExitReason");
2106 2109
2107 connect(emu_thread.get(), &EmuThread::ErrorThrown, this, &GMainWindow::OnCoreError); 2110 connect(emu_thread.get(), &EmuThread::ErrorThrown, this, &GMainWindow::OnCoreError);
2108 2111
2109 ui.action_Start->setEnabled(false); 2112 ui.action_Start->setEnabled(false);
2110 ui.action_Start->setText(tr("Continue")); 2113 ui.action_Start->setText(tr("&Continue"));
2111 2114
2112 ui.action_Pause->setEnabled(true); 2115 ui.action_Pause->setEnabled(true);
2113 ui.action_Stop->setEnabled(true); 2116 ui.action_Stop->setEnabled(true);
@@ -2251,7 +2254,7 @@ void GMainWindow::ToggleWindowMode() {
2251 } 2254 }
2252} 2255}
2253 2256
2254void GMainWindow::ResetWindowSize() { 2257void GMainWindow::ResetWindowSize720() {
2255 const auto aspect_ratio = Layout::EmulationAspectRatio( 2258 const auto aspect_ratio = Layout::EmulationAspectRatio(
2256 static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio.GetValue()), 2259 static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio.GetValue()),
2257 static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width); 2260 static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width);
@@ -2265,6 +2268,20 @@ void GMainWindow::ResetWindowSize() {
2265 } 2268 }
2266} 2269}
2267 2270
2271void GMainWindow::ResetWindowSize1080() {
2272 const auto aspect_ratio = Layout::EmulationAspectRatio(
2273 static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio.GetValue()),
2274 static_cast<float>(Layout::ScreenDocked::Height) / Layout::ScreenDocked::Width);
2275 if (!ui.action_Single_Window_Mode->isChecked()) {
2276 render_window->resize(Layout::ScreenDocked::Height / aspect_ratio,
2277 Layout::ScreenDocked::Height);
2278 } else {
2279 resize(Layout::ScreenDocked::Height / aspect_ratio,
2280 Layout::ScreenDocked::Height + menuBar()->height() +
2281 (ui.action_Show_Status_Bar->isChecked() ? statusBar()->height() : 0));
2282 }
2283}
2284
2268void GMainWindow::OnConfigure() { 2285void GMainWindow::OnConfigure() {
2269 const auto old_theme = UISettings::values.theme; 2286 const auto old_theme = UISettings::values.theme;
2270 const bool old_discord_presence = UISettings::values.enable_discord_presence; 2287 const bool old_discord_presence = UISettings::values.enable_discord_presence;
@@ -2512,14 +2529,27 @@ void GMainWindow::UpdateStatusBar() {
2512void GMainWindow::UpdateStatusButtons() { 2529void GMainWindow::UpdateStatusButtons() {
2513 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); 2530 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue());
2514 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); 2531 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
2515 Settings::values.use_asynchronous_gpu_emulation.SetValue(
2516 Settings::values.use_asynchronous_gpu_emulation.GetValue() ||
2517 Settings::values.use_multi_core.GetValue());
2518 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); 2532 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
2519#ifdef HAS_VULKAN
2520 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == 2533 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() ==
2521 Settings::RendererBackend::Vulkan); 2534 Settings::RendererBackend::Vulkan);
2535}
2536
2537void GMainWindow::UpdateUISettings() {
2538 if (!ui.action_Fullscreen->isChecked()) {
2539 UISettings::values.geometry = saveGeometry();
2540 UISettings::values.renderwindow_geometry = render_window->saveGeometry();
2541 }
2542 UISettings::values.state = saveState();
2543#if MICROPROFILE_ENABLED
2544 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
2545 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
2522#endif 2546#endif
2547 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
2548 UISettings::values.fullscreen = ui.action_Fullscreen->isChecked();
2549 UISettings::values.display_titlebar = ui.action_Display_Dock_Widget_Headers->isChecked();
2550 UISettings::values.show_filter_bar = ui.action_Show_Filter_Bar->isChecked();
2551 UISettings::values.show_status_bar = ui.action_Show_Status_Bar->isChecked();
2552 UISettings::values.first_start = false;
2523} 2553}
2524 2554
2525void GMainWindow::HideMouseCursor() { 2555void GMainWindow::HideMouseCursor() {
@@ -2709,7 +2739,7 @@ std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProv
2709 dlc_match.reserve(dlc_entries.size()); 2739 dlc_match.reserve(dlc_entries.size());
2710 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match), 2740 std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match),
2711 [&program_id, &installed](const FileSys::ContentProviderEntry& entry) { 2741 [&program_id, &installed](const FileSys::ContentProviderEntry& entry) {
2712 return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == program_id && 2742 return FileSys::GetBaseTitleID(entry.title_id) == program_id &&
2713 installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success; 2743 installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success;
2714 }); 2744 });
2715 2745
@@ -2755,22 +2785,7 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
2755 return; 2785 return;
2756 } 2786 }
2757 2787
2758 if (!ui.action_Fullscreen->isChecked()) { 2788 UpdateUISettings();
2759 UISettings::values.geometry = saveGeometry();
2760 UISettings::values.renderwindow_geometry = render_window->saveGeometry();
2761 }
2762 UISettings::values.state = saveState();
2763#if MICROPROFILE_ENABLED
2764 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
2765 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
2766#endif
2767 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
2768 UISettings::values.fullscreen = ui.action_Fullscreen->isChecked();
2769 UISettings::values.display_titlebar = ui.action_Display_Dock_Widget_Headers->isChecked();
2770 UISettings::values.show_filter_bar = ui.action_Show_Filter_Bar->isChecked();
2771 UISettings::values.show_status_bar = ui.action_Show_Status_Bar->isChecked();
2772 UISettings::values.first_start = false;
2773
2774 game_list->SaveInterfaceLayout(); 2789 game_list->SaveInterfaceLayout();
2775 hotkey_registry.SaveHotkeys(); 2790 hotkey_registry.SaveHotkeys();
2776 2791
@@ -2946,7 +2961,7 @@ void GMainWindow::OnLanguageChanged(const QString& locale) {
2946 UpdateWindowTitle(); 2961 UpdateWindowTitle();
2947 2962
2948 if (emulation_running) 2963 if (emulation_running)
2949 ui.action_Start->setText(tr("Continue")); 2964 ui.action_Start->setText(tr("&Continue"));
2950} 2965}
2951 2966
2952void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) { 2967void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) {
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 6242341d1..ea6d2c30d 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -55,6 +55,10 @@ namespace InputCommon {
55class InputSubsystem; 55class InputSubsystem;
56} 56}
57 57
58namespace Service::AM::Applets {
59enum class WebExitReason : u32;
60}
61
58enum class EmulatedDirectoryTarget { 62enum class EmulatedDirectoryTarget {
59 NAND, 63 NAND,
60 SDMC, 64 SDMC,
@@ -126,8 +130,8 @@ signals:
126 void SoftwareKeyboardFinishedText(std::optional<std::u16string> text); 130 void SoftwareKeyboardFinishedText(std::optional<std::u16string> text);
127 void SoftwareKeyboardFinishedCheckDialog(); 131 void SoftwareKeyboardFinishedCheckDialog();
128 132
129 void WebBrowserUnpackRomFS(); 133 void WebBrowserExtractOfflineRomFS();
130 void WebBrowserFinishedBrowsing(); 134 void WebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason, std::string last_url);
131 135
132public slots: 136public slots:
133 void OnLoadComplete(); 137 void OnLoadComplete();
@@ -138,7 +142,8 @@ public slots:
138 void ProfileSelectorSelectProfile(); 142 void ProfileSelectorSelectProfile();
139 void SoftwareKeyboardGetText(const Core::Frontend::SoftwareKeyboardParameters& parameters); 143 void SoftwareKeyboardGetText(const Core::Frontend::SoftwareKeyboardParameters& parameters);
140 void SoftwareKeyboardInvokeCheckDialog(std::u16string error_message); 144 void SoftwareKeyboardInvokeCheckDialog(std::u16string error_message);
141 void WebBrowserOpenPage(std::string_view filename, std::string_view arguments); 145 void WebBrowserOpenWebPage(std::string_view main_url, std::string_view additional_args,
146 bool is_local);
142 void OnAppFocusStateChanged(Qt::ApplicationState state); 147 void OnAppFocusStateChanged(Qt::ApplicationState state);
143 148
144private: 149private:
@@ -237,7 +242,8 @@ private slots:
237 void ShowFullscreen(); 242 void ShowFullscreen();
238 void HideFullscreen(); 243 void HideFullscreen();
239 void ToggleWindowMode(); 244 void ToggleWindowMode();
240 void ResetWindowSize(); 245 void ResetWindowSize720();
246 void ResetWindowSize1080();
241 void OnCaptureScreenshot(); 247 void OnCaptureScreenshot();
242 void OnCoreError(Core::System::ResultStatus, std::string); 248 void OnCoreError(Core::System::ResultStatus, std::string);
243 void OnReinitializeKeys(ReinitializeKeyBehavior behavior); 249 void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
@@ -257,6 +263,7 @@ private:
257 const std::string& title_version = {}); 263 const std::string& title_version = {});
258 void UpdateStatusBar(); 264 void UpdateStatusBar();
259 void UpdateStatusButtons(); 265 void UpdateStatusButtons();
266 void UpdateUISettings();
260 void HideMouseCursor(); 267 void HideMouseCursor();
261 void ShowMouseCursor(); 268 void ShowMouseCursor();
262 void OpenURL(const QUrl& url); 269 void OpenURL(const QUrl& url);
@@ -321,6 +328,9 @@ private:
321 // Last game booted, used for multi-process apps 328 // Last game booted, used for multi-process apps
322 QString last_filename_booted; 329 QString last_filename_booted;
323 330
331 // Disables the web applet for the rest of the emulated session
332 bool disable_web_applet{};
333
324protected: 334protected:
325 void dropEvent(QDropEvent* event) override; 335 void dropEvent(QDropEvent* event) override;
326 void dragEnterEvent(QDragEnterEvent* event) override; 336 void dragEnterEvent(QDragEnterEvent* event) override;
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 2f3792247..e2ad5baf6 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -25,16 +25,7 @@
25 </property> 25 </property>
26 <widget class="QWidget" name="centralwidget"> 26 <widget class="QWidget" name="centralwidget">
27 <layout class="QHBoxLayout" name="horizontalLayout"> 27 <layout class="QHBoxLayout" name="horizontalLayout">
28 <property name="leftMargin"> 28 <property name="margin">
29 <number>0</number>
30 </property>
31 <property name="topMargin">
32 <number>0</number>
33 </property>
34 <property name="rightMargin">
35 <number>0</number>
36 </property>
37 <property name="bottomMargin">
38 <number>0</number> 29 <number>0</number>
39 </property> 30 </property>
40 </layout> 31 </layout>
@@ -45,7 +36,7 @@
45 <x>0</x> 36 <x>0</x>
46 <y>0</y> 37 <y>0</y>
47 <width>1280</width> 38 <width>1280</width>
48 <height>21</height> 39 <height>26</height>
49 </rect> 40 </rect>
50 </property> 41 </property>
51 <widget class="QMenu" name="menu_File"> 42 <widget class="QMenu" name="menu_File">
@@ -54,7 +45,7 @@
54 </property> 45 </property>
55 <widget class="QMenu" name="menu_recent_files"> 46 <widget class="QMenu" name="menu_recent_files">
56 <property name="title"> 47 <property name="title">
57 <string>Recent Files</string> 48 <string>&amp;Recent Files</string>
58 </property> 49 </property>
59 </widget> 50 </widget>
60 <addaction name="action_Install_File_NAND"/> 51 <addaction name="action_Install_File_NAND"/>
@@ -89,7 +80,7 @@
89 </property> 80 </property>
90 <widget class="QMenu" name="menu_View_Debugging"> 81 <widget class="QMenu" name="menu_View_Debugging">
91 <property name="title"> 82 <property name="title">
92 <string>Debugging</string> 83 <string>&amp;Debugging</string>
93 </property> 84 </property>
94 </widget> 85 </widget>
95 <addaction name="action_Fullscreen"/> 86 <addaction name="action_Fullscreen"/>
@@ -97,13 +88,14 @@
97 <addaction name="action_Display_Dock_Widget_Headers"/> 88 <addaction name="action_Display_Dock_Widget_Headers"/>
98 <addaction name="action_Show_Filter_Bar"/> 89 <addaction name="action_Show_Filter_Bar"/>
99 <addaction name="action_Show_Status_Bar"/> 90 <addaction name="action_Show_Status_Bar"/>
100 <addaction name="action_Reset_Window_Size"/> 91 <addaction name="action_Reset_Window_Size_720"/>
92 <addaction name="action_Reset_Window_Size_1080"/>
101 <addaction name="separator"/> 93 <addaction name="separator"/>
102 <addaction name="menu_View_Debugging"/> 94 <addaction name="menu_View_Debugging"/>
103 </widget> 95 </widget>
104 <widget class="QMenu" name="menu_Tools"> 96 <widget class="QMenu" name="menu_Tools">
105 <property name="title"> 97 <property name="title">
106 <string>Tools</string> 98 <string>&amp;Tools</string>
107 </property> 99 </property>
108 <addaction name="action_Rederive"/> 100 <addaction name="action_Rederive"/>
109 <addaction name="separator"/> 101 <addaction name="separator"/>
@@ -131,17 +123,17 @@
131 <bool>true</bool> 123 <bool>true</bool>
132 </property> 124 </property>
133 <property name="text"> 125 <property name="text">
134 <string>Install Files to NAND...</string> 126 <string>&amp;Install Files to NAND...</string>
135 </property> 127 </property>
136 </action> 128 </action>
137 <action name="action_Load_File"> 129 <action name="action_Load_File">
138 <property name="text"> 130 <property name="text">
139 <string>Load File...</string> 131 <string>L&amp;oad File...</string>
140 </property> 132 </property>
141 </action> 133 </action>
142 <action name="action_Load_Folder"> 134 <action name="action_Load_Folder">
143 <property name="text"> 135 <property name="text">
144 <string>Load Folder...</string> 136 <string>Load &amp;Folder...</string>
145 </property> 137 </property>
146 </action> 138 </action>
147 <action name="action_Exit"> 139 <action name="action_Exit">
@@ -175,12 +167,12 @@
175 </action> 167 </action>
176 <action name="action_Rederive"> 168 <action name="action_Rederive">
177 <property name="text"> 169 <property name="text">
178 <string>Reinitialize keys...</string> 170 <string>&amp;Reinitialize keys...</string>
179 </property> 171 </property>
180 </action> 172 </action>
181 <action name="action_About"> 173 <action name="action_About">
182 <property name="text"> 174 <property name="text">
183 <string>About yuzu</string> 175 <string>&amp;About yuzu</string>
184 </property> 176 </property>
185 </action> 177 </action>
186 <action name="action_Single_Window_Mode"> 178 <action name="action_Single_Window_Mode">
@@ -188,12 +180,12 @@
188 <bool>true</bool> 180 <bool>true</bool>
189 </property> 181 </property>
190 <property name="text"> 182 <property name="text">
191 <string>Single Window Mode</string> 183 <string>Single &amp;Window Mode</string>
192 </property> 184 </property>
193 </action> 185 </action>
194 <action name="action_Configure"> 186 <action name="action_Configure">
195 <property name="text"> 187 <property name="text">
196 <string>Configure...</string> 188 <string>Con&amp;figure...</string>
197 </property> 189 </property>
198 </action> 190 </action>
199 <action name="action_Display_Dock_Widget_Headers"> 191 <action name="action_Display_Dock_Widget_Headers">
@@ -201,7 +193,7 @@
201 <bool>true</bool> 193 <bool>true</bool>
202 </property> 194 </property>
203 <property name="text"> 195 <property name="text">
204 <string>Display Dock Widget Headers</string> 196 <string>Display D&amp;ock Widget Headers</string>
205 </property> 197 </property>
206 </action> 198 </action>
207 <action name="action_Show_Filter_Bar"> 199 <action name="action_Show_Filter_Bar">
@@ -209,7 +201,7 @@
209 <bool>true</bool> 201 <bool>true</bool>
210 </property> 202 </property>
211 <property name="text"> 203 <property name="text">
212 <string>Show Filter Bar</string> 204 <string>Show &amp;Filter Bar</string>
213 </property> 205 </property>
214 </action> 206 </action>
215 <action name="action_Show_Status_Bar"> 207 <action name="action_Show_Status_Bar">
@@ -217,12 +209,26 @@
217 <bool>true</bool> 209 <bool>true</bool>
218 </property> 210 </property>
219 <property name="text"> 211 <property name="text">
212 <string>Show &amp;Status Bar</string>
213 </property>
214 <property name="iconText">
220 <string>Show Status Bar</string> 215 <string>Show Status Bar</string>
221 </property> 216 </property>
222 </action> 217 </action>
223 <action name="action_Reset_Window_Size"> 218 <action name="action_Reset_Window_Size_720">
219 <property name="text">
220 <string>Reset Window Size to &amp;720p</string>
221 </property>
222 <property name="iconText">
223 <string>Reset Window Size to 720p</string>
224 </property>
225 </action>
226 <action name="action_Reset_Window_Size_1080">
224 <property name="text"> 227 <property name="text">
225 <string>Reset Window Size</string> 228 <string>Reset Window Size to &amp;1080p</string>
229 </property>
230 <property name="iconText">
231 <string>Reset Window Size to 1080p</string>
226 </property> 232 </property>
227 </action> 233 </action>
228 <action name="action_Fullscreen"> 234 <action name="action_Fullscreen">
@@ -230,7 +236,7 @@
230 <bool>true</bool> 236 <bool>true</bool>
231 </property> 237 </property>
232 <property name="text"> 238 <property name="text">
233 <string>Fullscreen</string> 239 <string>F&amp;ullscreen</string>
234 </property> 240 </property>
235 </action> 241 </action>
236 <action name="action_Restart"> 242 <action name="action_Restart">
@@ -238,7 +244,7 @@
238 <bool>false</bool> 244 <bool>false</bool>
239 </property> 245 </property>
240 <property name="text"> 246 <property name="text">
241 <string>Restart</string> 247 <string>&amp;Restart</string>
242 </property> 248 </property>
243 </action> 249 </action>
244 <action name="action_Load_Amiibo"> 250 <action name="action_Load_Amiibo">
@@ -246,7 +252,7 @@
246 <bool>false</bool> 252 <bool>false</bool>
247 </property> 253 </property>
248 <property name="text"> 254 <property name="text">
249 <string>Load Amiibo...</string> 255 <string>Load &amp;Amiibo...</string>
250 </property> 256 </property>
251 </action> 257 </action>
252 <action name="action_Report_Compatibility"> 258 <action name="action_Report_Compatibility">
@@ -254,7 +260,7 @@
254 <bool>false</bool> 260 <bool>false</bool>
255 </property> 261 </property>
256 <property name="text"> 262 <property name="text">
257 <string>Report Compatibility</string> 263 <string>&amp;Report Compatibility</string>
258 </property> 264 </property>
259 <property name="visible"> 265 <property name="visible">
260 <bool>false</bool> 266 <bool>false</bool>
@@ -262,22 +268,22 @@
262 </action> 268 </action>
263 <action name="action_Open_Mods_Page"> 269 <action name="action_Open_Mods_Page">
264 <property name="text"> 270 <property name="text">
265 <string>Open Mods Page</string> 271 <string>Open &amp;Mods Page</string>
266 </property> 272 </property>
267 </action> 273 </action>
268 <action name="action_Open_Quickstart_Guide"> 274 <action name="action_Open_Quickstart_Guide">
269 <property name="text"> 275 <property name="text">
270 <string>Open Quickstart Guide</string> 276 <string>Open &amp;Quickstart Guide</string>
271 </property> 277 </property>
272 </action> 278 </action>
273 <action name="action_Open_FAQ"> 279 <action name="action_Open_FAQ">
274 <property name="text"> 280 <property name="text">
275 <string>FAQ</string> 281 <string>&amp;FAQ</string>
276 </property> 282 </property>
277 </action> 283 </action>
278 <action name="action_Open_yuzu_Folder"> 284 <action name="action_Open_yuzu_Folder">
279 <property name="text"> 285 <property name="text">
280 <string>Open yuzu Folder</string> 286 <string>Open &amp;yuzu Folder</string>
281 </property> 287 </property>
282 </action> 288 </action>
283 <action name="action_Capture_Screenshot"> 289 <action name="action_Capture_Screenshot">
@@ -285,7 +291,7 @@
285 <bool>false</bool> 291 <bool>false</bool>
286 </property> 292 </property>
287 <property name="text"> 293 <property name="text">
288 <string>Capture Screenshot</string> 294 <string>&amp;Capture Screenshot</string>
289 </property> 295 </property>
290 </action> 296 </action>
291 <action name="action_Configure_Current_Game"> 297 <action name="action_Configure_Current_Game">
@@ -293,7 +299,7 @@
293 <bool>false</bool> 299 <bool>false</bool>
294 </property> 300 </property>
295 <property name="text"> 301 <property name="text">
296 <string>Configure Current Game...</string> 302 <string>Configure C&amp;urrent Game...</string>
297 </property> 303 </property>
298 </action> 304 </action>
299 </widget> 305 </widget>
diff --git a/src/yuzu/util/url_request_interceptor.cpp b/src/yuzu/util/url_request_interceptor.cpp
new file mode 100644
index 000000000..2d491d8c0
--- /dev/null
+++ b/src/yuzu/util/url_request_interceptor.cpp
@@ -0,0 +1,32 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef YUZU_USE_QT_WEB_ENGINE
6
7#include "yuzu/util/url_request_interceptor.h"
8
9UrlRequestInterceptor::UrlRequestInterceptor(QObject* p) : QWebEngineUrlRequestInterceptor(p) {}
10
11UrlRequestInterceptor::~UrlRequestInterceptor() = default;
12
13void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo& info) {
14 const auto resource_type = info.resourceType();
15
16 switch (resource_type) {
17 case QWebEngineUrlRequestInfo::ResourceTypeMainFrame:
18 requested_url = info.requestUrl();
19 emit FrameChanged();
20 break;
21 case QWebEngineUrlRequestInfo::ResourceTypeSubFrame:
22 case QWebEngineUrlRequestInfo::ResourceTypeXhr:
23 emit FrameChanged();
24 break;
25 }
26}
27
28QUrl UrlRequestInterceptor::GetRequestedURL() const {
29 return requested_url;
30}
31
32#endif
diff --git a/src/yuzu/util/url_request_interceptor.h b/src/yuzu/util/url_request_interceptor.h
new file mode 100644
index 000000000..8a7f7499f
--- /dev/null
+++ b/src/yuzu/util/url_request_interceptor.h
@@ -0,0 +1,30 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#ifdef YUZU_USE_QT_WEB_ENGINE
8
9#include <QObject>
10#include <QWebEngineUrlRequestInterceptor>
11
12class UrlRequestInterceptor : public QWebEngineUrlRequestInterceptor {
13 Q_OBJECT
14
15public:
16 explicit UrlRequestInterceptor(QObject* p = nullptr);
17 ~UrlRequestInterceptor() override;
18
19 void interceptRequest(QWebEngineUrlRequestInfo& info) override;
20
21 QUrl GetRequestedURL() const;
22
23signals:
24 void FrameChanged();
25
26private:
27 QUrl requested_url;
28};
29
30#endif
diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt
index 57f9916f6..0b3f2cb54 100644
--- a/src/yuzu_cmd/CMakeLists.txt
+++ b/src/yuzu_cmd/CMakeLists.txt
@@ -4,26 +4,17 @@ add_executable(yuzu-cmd
4 config.cpp 4 config.cpp
5 config.h 5 config.h
6 default_ini.h 6 default_ini.h
7 emu_window/emu_window_sdl2_gl.cpp
8 emu_window/emu_window_sdl2_gl.h
9 emu_window/emu_window_sdl2.cpp 7 emu_window/emu_window_sdl2.cpp
10 emu_window/emu_window_sdl2.h 8 emu_window/emu_window_sdl2.h
11 emu_window/emu_window_sdl2_gl.cpp 9 emu_window/emu_window_sdl2_gl.cpp
12 emu_window/emu_window_sdl2_gl.h 10 emu_window/emu_window_sdl2_gl.h
11 emu_window/emu_window_sdl2_vk.cpp
12 emu_window/emu_window_sdl2_vk.h
13 resource.h 13 resource.h
14 yuzu.cpp 14 yuzu.cpp
15 yuzu.rc 15 yuzu.rc
16) 16)
17 17
18if (ENABLE_VULKAN)
19 target_sources(yuzu-cmd PRIVATE
20 emu_window/emu_window_sdl2_vk.cpp
21 emu_window/emu_window_sdl2_vk.h)
22
23 target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include)
24 target_compile_definitions(yuzu-cmd PRIVATE HAS_VULKAN)
25endif()
26
27create_target_directory_groups(yuzu-cmd) 18create_target_directory_groups(yuzu-cmd)
28 19
29target_link_libraries(yuzu-cmd PRIVATE common core input_common) 20target_link_libraries(yuzu-cmd PRIVATE common core input_common)
@@ -33,6 +24,8 @@ if (MSVC)
33endif() 24endif()
34target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads) 25target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads)
35 26
27target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include)
28
36if(UNIX AND NOT APPLE) 29if(UNIX AND NOT APPLE)
37 install(TARGETS yuzu-cmd RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 30 install(TARGETS yuzu-cmd RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
38endif() 31endif()
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 34c9673bc..38075c345 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -345,7 +345,6 @@ void Config::ReadValues() {
345 // System 345 // System
346 Settings::values.use_docked_mode.SetValue( 346 Settings::values.use_docked_mode.SetValue(
347 sdl2_config->GetBoolean("System", "use_docked_mode", false)); 347 sdl2_config->GetBoolean("System", "use_docked_mode", false));
348 const auto size = sdl2_config->GetInteger("System", "users_size", 0);
349 348
350 Settings::values.current_user = std::clamp<int>( 349 Settings::values.current_user = std::clamp<int>(
351 sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1); 350 sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1);
@@ -430,9 +429,6 @@ void Config::ReadValues() {
430 // Debugging 429 // Debugging
431 Settings::values.record_frame_times = 430 Settings::values.record_frame_times =
432 sdl2_config->GetBoolean("Debugging", "record_frame_times", false); 431 sdl2_config->GetBoolean("Debugging", "record_frame_times", false);
433 Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false);
434 Settings::values.gdbstub_port =
435 static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689));
436 Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", ""); 432 Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", "");
437 Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false); 433 Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false);
438 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); 434 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index bcbbcd4ca..2d4b98d9a 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -318,9 +318,6 @@ log_filter = *:Trace
318[Debugging] 318[Debugging]
319# Record frame time data, can be found in the log directory. Boolean value 319# Record frame time data, can be found in the log directory. Boolean value
320record_frame_times = 320record_frame_times =
321# Port for listening to GDB connections.
322use_gdbstub=false
323gdbstub_port=24689
324# Determines whether or not yuzu will dump the ExeFS of all games it attempts to load while loading them 321# Determines whether or not yuzu will dump the ExeFS of all games it attempts to load while loading them
325dump_exefs=false 322dump_exefs=false
326# Determines whether or not yuzu will dump all NSOs it attempts to load while loading them 323# Determines whether or not yuzu will dump all NSOs it attempts to load while loading them
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index c2efe1ee6..2497c71ae 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -35,9 +35,7 @@
35#include "yuzu_cmd/config.h" 35#include "yuzu_cmd/config.h"
36#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 36#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
37#include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" 37#include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h"
38#ifdef HAS_VULKAN
39#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" 38#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h"
40#endif
41 39
42#ifdef _WIN32 40#ifdef _WIN32
43// windows.h needs to be included before shellapi.h 41// windows.h needs to be included before shellapi.h
@@ -64,7 +62,6 @@ __declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
64static void PrintHelp(const char* argv0) { 62static void PrintHelp(const char* argv0) {
65 std::cout << "Usage: " << argv0 63 std::cout << "Usage: " << argv0
66 << " [options] <filename>\n" 64 << " [options] <filename>\n"
67 "-g, --gdbport=NUMBER Enable gdb stub on port NUMBER\n"
68 "-f, --fullscreen Start in fullscreen mode\n" 65 "-f, --fullscreen Start in fullscreen mode\n"
69 "-h, --help Display this help and exit\n" 66 "-h, --help Display this help and exit\n"
70 "-v, --version Output version information and exit\n" 67 "-v, --version Output version information and exit\n"
@@ -96,8 +93,6 @@ int main(int argc, char** argv) {
96 Config config; 93 Config config;
97 94
98 int option_index = 0; 95 int option_index = 0;
99 bool use_gdbstub = Settings::values.use_gdbstub;
100 u32 gdb_port = static_cast<u32>(Settings::values.gdbstub_port);
101 96
102 InitializeLogging(); 97 InitializeLogging();
103 98
@@ -116,26 +111,17 @@ int main(int argc, char** argv) {
116 bool fullscreen = false; 111 bool fullscreen = false;
117 112
118 static struct option long_options[] = { 113 static struct option long_options[] = {
119 {"gdbport", required_argument, 0, 'g'}, {"fullscreen", no_argument, 0, 'f'}, 114 {"fullscreen", no_argument, 0, 'f'},
120 {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, 115 {"help", no_argument, 0, 'h'},
121 {"program", optional_argument, 0, 'p'}, {0, 0, 0, 0}, 116 {"version", no_argument, 0, 'v'},
117 {"program", optional_argument, 0, 'p'},
118 {0, 0, 0, 0},
122 }; 119 };
123 120
124 while (optind < argc) { 121 while (optind < argc) {
125 int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index); 122 int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
126 if (arg != -1) { 123 if (arg != -1) {
127 switch (static_cast<char>(arg)) { 124 switch (static_cast<char>(arg)) {
128 case 'g':
129 errno = 0;
130 gdb_port = strtoul(optarg, &endarg, 0);
131 use_gdbstub = true;
132 if (endarg == optarg)
133 errno = EINVAL;
134 if (errno != 0) {
135 perror("--gdbport");
136 exit(1);
137 }
138 break;
139 case 'f': 125 case 'f':
140 fullscreen = true; 126 fullscreen = true;
141 LOG_INFO(Frontend, "Starting in fullscreen mode..."); 127 LOG_INFO(Frontend, "Starting in fullscreen mode...");
@@ -177,8 +163,6 @@ int main(int argc, char** argv) {
177 InputCommon::InputSubsystem input_subsystem; 163 InputCommon::InputSubsystem input_subsystem;
178 164
179 // Apply the command line arguments 165 // Apply the command line arguments
180 Settings::values.gdbstub_port = gdb_port;
181 Settings::values.use_gdbstub = use_gdbstub;
182 Settings::Apply(system); 166 Settings::Apply(system);
183 167
184 std::unique_ptr<EmuWindow_SDL2> emu_window; 168 std::unique_ptr<EmuWindow_SDL2> emu_window;
@@ -187,13 +171,8 @@ int main(int argc, char** argv) {
187 emu_window = std::make_unique<EmuWindow_SDL2_GL>(&input_subsystem, fullscreen); 171 emu_window = std::make_unique<EmuWindow_SDL2_GL>(&input_subsystem, fullscreen);
188 break; 172 break;
189 case Settings::RendererBackend::Vulkan: 173 case Settings::RendererBackend::Vulkan:
190#ifdef HAS_VULKAN
191 emu_window = std::make_unique<EmuWindow_SDL2_VK>(&input_subsystem); 174 emu_window = std::make_unique<EmuWindow_SDL2_VK>(&input_subsystem);
192 break; 175 break;
193#else
194 LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!");
195 return 1;
196#endif
197 } 176 }
198 177
199 system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>()); 178 system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>());
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index b6cdc7c1c..91684e96e 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -158,7 +158,6 @@ void Config::ReadValues() {
158 Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false); 158 Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false);
159 159
160 // Debugging 160 // Debugging
161 Settings::values.use_gdbstub = false;
162 Settings::values.program_args = ""; 161 Settings::values.program_args = "";
163 Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false); 162 Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false);
164 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); 163 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false);
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index 50bd7ae41..6435ffabb 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -162,7 +162,6 @@ int main(int argc, char** argv) {
162 162
163 Core::System& system{Core::System::GetInstance()}; 163 Core::System& system{Core::System::GetInstance()};
164 164
165 Settings::values.use_gdbstub = false;
166 Settings::Apply(system); 165 Settings::Apply(system);
167 166
168 const auto emu_window{std::make_unique<EmuWindow_SDL2_Hide>()}; 167 const auto emu_window{std::make_unique<EmuWindow_SDL2_Hide>()};